jiangzhx commented on code in PR #5907:
URL: https://github.com/apache/arrow-datafusion/pull/5907#discussion_r1166651258
##########
datafusion/core/tests/sql/subqueries.rs:
##########
@@ -129,12 +129,14 @@ async fn exists_subquery_with_same_table() -> Result<()> {
let expected = vec![
"Explain [plan_type:Utf8, plan:Utf8]",
- " Filter: EXISTS (<subquery>) [t1_id:UInt32;N, t1_name:Utf8;N,
t1_int:UInt32;N]",
- " Subquery: [t1_int:UInt32;N]",
- " Projection: t1.t1_int [t1_int:UInt32;N]",
- " Filter: t1.t1_id > t1.t1_int [t1_id:UInt32;N, t1_name:Utf8;N,
t1_int:UInt32;N]",
- " TableScan: t1 [t1_id:UInt32;N, t1_name:Utf8;N,
t1_int:UInt32;N]",
- " TableScan: t1 projection=[t1_id, t1_name, t1_int]
[t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
+ " Projection: t1.t1_id, t1.t1_name, t1.t1_int [t1_id:UInt32;N,
t1_name:Utf8;N, t1_int:UInt32;N]",
+ " CrossJoin: [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N,
COUNT(UInt8(1)):Int64;N]",
+ " TableScan: t1 projection=[t1_id, t1_name, t1_int]
[t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
+ " SubqueryAlias: __scalar_sq_1 [COUNT(UInt8(1)):Int64;N]",
+ " Filter: COUNT(UInt8(1)) > Int64(0) [COUNT(UInt8(1)):Int64;N]",
+ " Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1))]]
[COUNT(UInt8(1)):Int64;N]",
+ " Filter: t1.t1_id > t1.t1_int [t1_id:UInt32;N,
t1_int:UInt32;N]",
+ " TableScan: t1 projection=[t1_id, t1_int]
[t1_id:UInt32;N, t1_int:UInt32;N]",
Review Comment:
> actually do an aggregate to count how many rows
like spark did, the better way is
```
WHERE EXISTS (SELECT b FROM t2 );
rewrite to
WHERE (SELECT 1 FROM (SELECT b FROM t2) LIMIT 1) IS NOT NULL
```
but now the ScalarSubqueryToJoin optimizer can not rewrite this to join
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]