Github user nsyca commented on a diff in the pull request:
https://github.com/apache/spark/pull/16841#discussion_r100077204
--- Diff:
sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-joins.sql.out
---
@@ -0,0 +1,353 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 14
+
+
+-- !query 0
+create temporary view t1 as select * from values
+ ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04
01:00:00.000', date '2014-04-04'),
+ ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', date '2014-05-04'),
+ ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04
01:02:00.001', date '2014-06-04'),
+ ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04
01:01:00.000', date '2014-07-04'),
+ ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04
01:02:00.001', date '2014-05-05'),
+ ("val1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04
01:01:00.000', null),
+ ("val1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04
01:02:00.001', null),
+ ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04
01:01:00.000', date '2014-08-04'),
+ ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04
01:02:00.001', date '2014-09-04'),
+ ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04
01:01:00.000', date '2015-05-04'),
+ ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04
01:02:00.001', date '2014-04-04'),
+ ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', date '2014-05-04')
+ as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+create temporary view t2 as select * from values
+ ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04
01:01:00.000', date '2014-04-04'),
+ ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', date '2014-05-04'),
+ ("val1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04
01:01:00.000', date '2015-05-04'),
+ ("val1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04
01:01:00.000', date '2016-05-04'),
+ ("val1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04
01:01:00.000', null),
+ ("val2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04
01:01:00.000', date '2014-06-04'),
+ ("val1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', date '2014-05-04'),
+ ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04
01:01:00.000', date '2014-06-04'),
+ ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04
01:01:00.000', date '2014-07-04'),
+ ("val1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04
01:01:00.000', date '2014-08-05'),
+ ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04
01:01:00.000', date '2014-09-04'),
+ ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04
01:01:00.000', date '2014-10-04'),
+ ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', null)
+ as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+create temporary view t3 as select * from values
+ ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04
01:02:00.000', date '2014-04-04'),
+ ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04
01:02:00.000', date '2014-05-04'),
+ ("val1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04
01:02:00.000', date '2014-05-04'),
+ ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04
01:02:00.000', date '2014-05-04'),
+ ("val1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04
01:02:00.000', date '2014-06-04'),
+ ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04
01:02:00.000', date '2014-07-04'),
+ ("val3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04
01:02:00.000', date '2014-08-04'),
+ ("val3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04
01:02:00.000', date '2014-09-05'),
+ ("val1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04
01:02:00.000', null),
+ ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04
01:02:00.000', null),
+ ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04
01:02:00.000', date '2014-05-04'),
+ ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04
01:02:00.000', date '2015-05-04')
+ as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
+-- !query 2 schema
+struct<>
+-- !query 2 output
+
+
+
+-- !query 3
+SELECT t1a, t1b, t1c, t3a, t3b, t3c
+FROM t1 natural JOIN t3
+WHERE t1a IN (SELECT t2a
+ FROM t2
+ WHERE t1a = t2a)
+ AND t1b = t3b
+ AND t1a = t3a
+ORDER BY t1a,
+ t1b,
+ t1c DESC nulls first
+-- !query 3 schema
+struct<t1a:string,t1b:smallint,t1c:int,t3a:string,t3b:smallint,t3c:int>
+-- !query 3 output
+val1b 8 16 val1b 8 16
+val1b 8 16 val1b 8 16
+
+
+-- !query 4
+SELECT Count(DISTINCT(t1a)),
+ t1b,
+ t3a,
+ t3b,
+ t3c
+FROM t1 natural left JOIN t3
+WHERE t1a IN
+ (
+ SELECT t2a
+ FROM t2
+ WHERE t1d = t2d)
+AND t1b > t3b
+GROUP BY t1a,
+ t1b,
+ t3a,
+ t3b,
+ t3c
+ORDER BY t1a DESC
+-- !query 4 schema
+struct<count(DISTINCT
t1a):bigint,t1b:smallint,t3a:string,t3b:smallint,t3c:int>
+-- !query 4 output
+1 10 val3b 8 NULL
+1 10 val1b 8 16
+1 10 val3a 6 12
+1 8 val3a 6 12
+1 8 val3a 6 12
+
+
+-- !query 5
+SELECT Count(DISTINCT(t1a))
+FROM t1 natural right JOIN t3
+WHERE t1a IN
+ (
+ SELECT t2a
+ FROM t2
+ WHERE t1b = t2b)
+AND t1d IN
+ (
+ SELECT t2d
+ FROM t2
+ WHERE t1c > t2c)
+AND t1a = t3a
+GROUP BY t1a
+ORDER BY t1a
+-- !query 5 schema
+struct<count(DISTINCT t1a):bigint>
+-- !query 5 output
+1
+
+
+-- !query 6
+SELECT t1a,
+ t1b,
+ t1c,
+ t3a,
+ t3b,
+ t3c
+FROM t1 FULL OUTER JOIN t3
+where t1a IN
+ (
+ SELECT t2a
+ FROM t2
+ WHERE t2c IS NOT NULL)
+AND t1b != t3b
+AND t1a = 'val1b'
+ORDER BY t1a
+-- !query 6 schema
+struct<t1a:string,t1b:smallint,t1c:int,t3a:string,t3b:smallint,t3c:int>
+-- !query 6 output
+val1b 8 16 val3a 6 12
+val1b 8 16 val3a 6 12
+val1b 8 16 val1b 10 12
+val1b 8 16 val1b 10 12
+val1b 8 16 val3c 17 16
+val1b 8 16 val3c 17 16
+
+
+-- !query 7
+SELECT Count(DISTINCT(t1a)),
+ t1b
+FROM t1 RIGHT JOIN t3
+where t1a IN
+ (
+ SELECT t2a
+ FROM t2
+ WHERE t2h > t3h)
+AND t3a IN
+ (
+ SELECT t2a
+ FROM t2
+ WHERE t2c > t3c)
+AND t1h >= t3h
+GROUP BY t1a,
+ t1b
+HAVING t1b > 8
+ORDER BY t1a
+-- !query 7 schema
+struct<count(DISTINCT t1a):bigint,t1b:smallint>
+-- !query 7 output
+1 10
+
+
+-- !query 8
+SELECT Count(DISTINCT(t1a))
+FROM t1 LEFT OUTER
+JOIN t3
+ON t1a = t3a
+WHERE t1a IN
+ (
+ SELECT t2a
+ FROM t2
+ WHERE t1h < t2h )
+GROUP BY t1a
+ORDER BY t1a
+-- !query 8 schema
+struct<count(DISTINCT t1a):bigint>
+-- !query 8 output
+1
+1
+1
+
+
+-- !query 9
+SELECT Count(DISTINCT(t1a)),
+ t1b
+FROM t1 INNER JOIN t2
+ON t1a > t2a
+WHERE t1b IN
+ (
+ SELECT t2b
+ FROM t2
+ WHERE t2h > t1h)
+OR t1a IN
+ (
+ SELECT t2a
+ FROM t2
+ WHERE t2h < t1h)
+GROUP BY t1b
+HAVING t1b > 6
+-- !query 9 schema
+struct<count(DISTINCT t1a):bigint,t1b:smallint>
+-- !query 9 output
+1 10
+1 8
+
+
+-- !query 10
+SELECT Count(DISTINCT(t1a)),
+ t1b
+FROM t1
+WHERE t1a IN
+ (
+ SELECT t2a
+ FROM t2
+ JOIN t1
+ WHERE t2b <> t1b)
+AND t1h IN
+ (
+ SELECT t2h
+ FROM t2
+ RIGHT JOIN t3
+ where t2b = t3b)
+GROUP BY t1b
+HAVING t1b > 8
+-- !query 10 schema
+struct<count(DISTINCT t1a):bigint,t1b:smallint>
+-- !query 10 output
+1 10
+
+
+-- !query 11
+SELECT Count(DISTINCT(t1a)),
+ t1b
+FROM t1
+WHERE t1a IN
+ (
+ SELECT t2a
+ FROM t2
+ JOIN t1
+ WHERE t2b <> t1b)
+AND t1h IN
+ (
+ SELECT t2h
+ FROM t2
+ RIGHT JOIN t3
+ where t2b = t3b)
+AND t1b IN
+ (
+ SELECT t2b
+ FROM t2
+ FULL OUTER JOIN t3
+ where t2b = t3b)
+
+GROUP BY t1b
+HAVING t1b > 8
+-- !query 11 schema
+struct<count(DISTINCT t1a):bigint,t1b:smallint>
+-- !query 11 output
+1 10
+
+
+-- !query 12
+SELECT Count(DISTINCT(t1a)),
+ t1b
+FROM t1
+INNER JOIN t2 on t1b = t2b
+RIGHT JOIN t3 ON t1a = t3a
+where t1a IN
+ (
+ SELECT t2a
+ FROM t2
+ FULL OUTER JOIN t3
+ WHERE t2b > t3b)
+AND t1c IN
+ (
+ SELECT t3c
+ FROM t3
+ LEFT OUTER JOIN t2
+ ON t3a = t2a )
+AND t1b IN
+ (
+ SELECT t3b
+ FROM t3 LEFT OUTER
+ JOIN t1
+ WHERE t3c = t1c)
+
+AND t1a = t2a
+GROUP BY t1b
+ORDER BY t1b DESC
+-- !query 12 schema
+struct<count(DISTINCT t1a):bigint,t1b:smallint>
+-- !query 12 output
+1 8
+
+
+-- !query 13
+SELECT t1a,
+ t1b,
+ t1c,
+ count(distinct(t2a)),
+ t2b,
+ t2c
+FROM t1
+FULL JOIN t2 on t1a = t2a
+RIGHT JOIN t3 on t1a = t3a
+where t1a IN
+ (
+ SELECT t2a
+ FROM t2 INNER
+ JOIN t3
+ ON t2b < t3b
+ WHERE t2c IN
+ (
+ SELECT t1c
+ FROM t1
+ WHERE t1a = t2a))
+and t1a = t2a
+Group By t1a, t1b, t1c, t2a, t2b, t2c
+HAVING t2c IS NOT NULL
+ORDER By t2b DESC nulls last
+-- !query 13 schema
+struct<t1a:string,t1b:smallint,t1c:int,count(DISTINCT
t2a):bigint,t2b:smallint,t2c:int>
+-- !query 13 output
+val1b 8 16 1 10 12
+val1b 8 16 1 8 16
+val1b 8 16 1 NULL 16
--- End diff --
The results match with the ones from DB2.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]