mkmik opened a new issue, #4123:
URL: https://github.com/apache/arrow-datafusion/issues/4123
**Describe the bug**
Some statements that involve subqueries with an alias, are not evaluated
correctly when the subquery contains both `group by` and `order by`.
**To Reproduce**
With `datafusion-cli`:
```console
❯ select a from (select a from (select 1 as a) as foo group by 1 order by a)
as c;
SchemaError(FieldNotFound { field: Column { relation: Some("foo"), name: "a"
}, valid_fields: Some([Column { relation: Some("c"), name: "a" }]) })
```
**Expected behavior**
```console
❯ select a from (select a from (select 1 as a) as foo group by 1 order by a)
as c;
+---+
| a |
+---+
| 1 |
+---+
1 row in set. Query took 0.005 seconds.
```
**Additional context**
Interestingly if I remove as c in the outer expression it works:
```console
❯ select a from (select a from (select 1 as a) as foo group by 1 order by a);
+---+
| a |
+---+
| 1 |
+---+
1 row in set. Query took 0.005 seconds.
```
It's also interesting that using positional column references in the order
by works:
```console
❯ select a from (select a from (select 1 as a) as foo group by 1 order by 1)
as c;
+---+
| a |
+---+
| 1 |
+---+
1 row in set. Query took 0.005 seconds.
```
Furthermore, the bug reproduces only if `group by a` and `order by a` are
*both* present:
```console
❯ select a from (select a from (select 1 as a) as foo group by a) as c;
+---+
| a |
+---+
| 1 |
+---+
1 row in set. Query took 0.005 seconds.
❯ select a from (select a from (select 1 as a) as foo order by a) as c;
+---+
| a |
+---+
| 1 |
+---+
1 row in set. Query took 0.004 seconds.
```
I tried to look at the parsed SQL AST to see if the bug was in the parser or
in the way the AST is interpreted (in order to know where to file the bug):
I ran:
```rust
use sqlparser::dialect::PostgreSqlDialect;
use sqlparser::parser::Parser;
fn main() {
let dialect = PostgreSqlDialect {}; // or AnsiDialect, or your own
dialect ...
let sql = "select a from (select a from (select 1 as a) as foo group by
a order by 1)";
let ast = Parser::parse_sql(&dialect, sql).unwrap();
println!("NUM: {:?}", ast[0]);
let sql = "select a from (select a from (select 1 as a) as foo group by
a order by a)";
let ast = Parser::parse_sql(&dialect, sql).unwrap();
println!("SYM: {:?}", ast[0]);
}
```
and got the following output:
```
NUM: Query(Query { with: None, body: Select(Select { distinct: false, top:
None, projection: [UnnamedExpr(Identifier(Ident { value: "a", quote_style: None
}))], into: None, from: [TableWithJoins { relation: Derived { lateral: false,
subquery: Query { with: None, body: Select(Select { distinct: false, top: None,
projection: [UnnamedExpr(Identifier(Ident { value: "a", quote_style: None }))],
into: None, from: [TableWithJoins { relation: Derived { lateral: false,
subquery: Query { with: None, body: Select(Select { distinct: false, top: None,
projection: [ExprWithAlias { expr: Value(Number("1", false)), alias: Ident {
value: "a", quote_style: None } }], into: None, from: [], lateral_views: [],
selection: None, group_by: [], cluster_by: [], distribute_by: [], sort_by: [],
having: None, qualify: None }), order_by: [], limit: None, offset: None, fetch:
None, lock: None }, alias: Some(TableAlias { name: Ident { value: "foo",
quote_style: None }, columns: [] }) }, joins: [] }], lateral_vi
ews: [], selection: None, group_by: [Identifier(Ident { value: "a",
quote_style: None })], cluster_by: [], distribute_by: [], sort_by: [], having:
None, qualify: None }), order_by: [OrderByExpr { expr: Value(Number("1",
false)), asc: None, nulls_first: None }], limit: None, offset: None, fetch:
None, lock: None }, alias: None }, joins: [] }], lateral_views: [], selection:
None, group_by: [], cluster_by: [], distribute_by: [], sort_by: [], having:
None, qualify: None }), order_by: [], limit: None, offset: None, fetch: None,
lock: None })
SYM: Query(Query { with: None, body: Select(Select { distinct: false, top:
None, projection: [UnnamedExpr(Identifier(Ident { value: "a", quote_style: None
}))], into: None, from: [TableWithJoins { relation: Derived { lateral: false,
subquery: Query { with: None, body: Select(Select { distinct: false, top: None,
projection: [UnnamedExpr(Identifier(Ident { value: "a", quote_style: None }))],
into: None, from: [TableWithJoins { relation: Derived { lateral: false,
subquery: Query { with: None, body: Select(Select { distinct: false, top: None,
projection: [ExprWithAlias { expr: Value(Number("1", false)), alias: Ident {
value: "a", quote_style: None } }], into: None, from: [], lateral_views: [],
selection: None, group_by: [], cluster_by: [], distribute_by: [], sort_by: [],
having: None, qualify: None }), order_by: [], limit: None, offset: None, fetch:
None, lock: None }, alias: Some(TableAlias { name: Ident { value: "foo",
quote_style: None }, columns: [] }) }, joins: [] }], lateral_vie
ws: [], selection: None, group_by: [Identifier(Ident { value: "a",
quote_style: None })], cluster_by: [], distribute_by: [], sort_by: [], having:
None, qualify: None }), order_by: [OrderByExpr { expr: Identifier(Ident {
value: "a", quote_style: None }), asc: None, nulls_first: None }], limit: None,
offset: None, fetch: None, lock: None }, alias: None }, joins: [] }],
lateral_views: [], selection: None, group_by: [], cluster_by: [],
distribute_by: [], sort_by: [], having: None, qualify: None }), order_by: [],
limit: None, offset: None, fetch: None, lock: None })
```
wdiff shows only a difference in the `OrderByExpr`:
```
OrderByExpr { expr: [-Value(Number("1", false)),-] {+Identifier(Ident {
value: "a", quote_style: None }),+} asc: None, nulls_first: None }
```
I think this shows that `sqlparser` is parsing the statement correctly and
that the bug is in datafusion.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]