alamb commented on code in PR #15090: URL: https://github.com/apache/datafusion/pull/15090#discussion_r1987858416
########## datafusion/sqllogictest/test_files/joins.slt: ########## @@ -4541,3 +4541,269 @@ DROP TABLE test statement ok set datafusion.execution.target_partitions = 1; + +# test using_join_multiple_keys_subquery +statement count 0 +create table person(id int, age int, state int); + +statement count 0 +create table lineitem(c1 int); + +query TT +explain SELECT * FROM person a join person b using (id, age); +---- +logical_plan +01)Projection: a.id, a.age, a.state, b.state +02)--Inner Join: a.id = b.id, a.age = b.age +03)----SubqueryAlias: a +04)------TableScan: person projection=[id, age, state] +05)----SubqueryAlias: b +06)------TableScan: person projection=[id, age, state] +physical_plan +01)CoalesceBatchesExec: target_batch_size=3 +02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1)], projection=[id@0, age@1, state@2, state@5] +03)----DataSourceExec: partitions=1, partition_sizes=[0] +04)----DataSourceExec: partitions=1, partition_sizes=[0] + +query TT +explain SELECT age FROM (SELECT * FROM person a join person b using (id, age, state)); +---- +logical_plan +01)Projection: a.age +02)--Inner Join: a.id = b.id, a.age = b.age, a.state = b.state +03)----SubqueryAlias: a +04)------TableScan: person projection=[id, age, state] +05)----SubqueryAlias: b +06)------TableScan: person projection=[id, age, state] +physical_plan +01)CoalesceBatchesExec: target_batch_size=3 +02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1), (state@2, state@2)], projection=[age@1] +03)----DataSourceExec: partitions=1, partition_sizes=[0] +04)----DataSourceExec: partitions=1, partition_sizes=[0] + +query TT +explain SELECT a.* FROM person a join person b using (id, age); +---- +logical_plan +01)Projection: a.id, a.age, a.state +02)--Inner Join: a.id = b.id, a.age = b.age +03)----SubqueryAlias: a +04)------TableScan: person projection=[id, age, state] +05)----SubqueryAlias: b +06)------TableScan: person projection=[id, age] +physical_plan +01)CoalesceBatchesExec: target_batch_size=3 +02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1)], projection=[id@0, age@1, state@2] +03)----DataSourceExec: partitions=1, partition_sizes=[0] +04)----DataSourceExec: partitions=1, partition_sizes=[0] + +query TT +explain SELECT a.*, b.* FROM person a join person b using (id, age); +---- +logical_plan +01)Inner Join: a.id = b.id, a.age = b.age +02)--SubqueryAlias: a +03)----TableScan: person projection=[id, age, state] +04)--SubqueryAlias: b +05)----TableScan: person projection=[id, age, state] +physical_plan +01)CoalesceBatchesExec: target_batch_size=3 +02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1)] +03)----DataSourceExec: partitions=1, partition_sizes=[0] +04)----DataSourceExec: partitions=1, partition_sizes=[0] + +query TT +explain SELECT * FROM person a join person b using (id, age, state) join person c using (id, age, state); +---- +logical_plan +01)Projection: a.id, a.age, a.state +02)--Inner Join: a.id = c.id, a.age = c.age, a.state = c.state +03)----Projection: a.id, a.age, a.state +04)------Inner Join: a.id = b.id, a.age = b.age, a.state = b.state +05)--------SubqueryAlias: a +06)----------TableScan: person projection=[id, age, state] +07)--------SubqueryAlias: b +08)----------TableScan: person projection=[id, age, state] +09)----SubqueryAlias: c +10)------TableScan: person projection=[id, age, state] +physical_plan +01)CoalesceBatchesExec: target_batch_size=3 +02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1), (state@2, state@2)], projection=[id@0, age@1, state@2] +03)----CoalesceBatchesExec: target_batch_size=3 +04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0), (age@1, age@1), (state@2, state@2)], projection=[id@0, age@1, state@2] +05)--------DataSourceExec: partitions=1, partition_sizes=[0] +06)--------DataSourceExec: partitions=1, partition_sizes=[0] +07)----DataSourceExec: partitions=1, partition_sizes=[0] + +query TT +explain SELECT * FROM person a NATURAL JOIN lineitem b; Review Comment: it is very nice to port these tests to sqllogictests ########## datafusion/sqllogictest/test_files/order.slt: ########## @@ -985,13 +985,20 @@ drop table ambiguity_test; statement ok create table t(a0 int, a int, b int, c int) as values (1, 2, 3, 4), (5, 6, 7, 8); -# expect this query to run successfully, not error -query III +# b is not selected in subquery +query error DataFusion error: Schema error: No field named b\. Valid fields are t1\.c, t1\.a, t1\.a0\. select * from (select c, a, NULL::int as a0 from t order by a, c) t1 union all select * from (select c, NULL::int as a, a0 from t order by a0, c) t2 order by c, a, a0, b Review Comment: I agree the query is invalid (and postgres agrees). I made a PR to fix it here: - https://github.com/apache/datafusion/pull/15131 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
