Github user nsyca commented on a diff in the pull request:
https://github.com/apache/spark/pull/16915#discussion_r101375513
--- Diff:
sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-joins.sql.out
---
@@ -0,0 +1,229 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 9
+
+
+-- !query 0
+create temporary view t1 as select * from values
+ ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04
01:00:00.000', date '2014-04-04'),
+ ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', date '2014-05-04'),
+ ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04
01:02:00.001', date '2014-06-04'),
+ ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04
01:01:00.000', date '2014-07-04'),
+ ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04
01:02:00.001', date '2014-05-05'),
+ ("val1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04
01:01:00.000', null),
+ ("val1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04
01:02:00.001', null),
+ ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04
01:01:00.000', date '2014-08-04'),
+ ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04
01:02:00.001', date '2014-09-04'),
+ ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04
01:01:00.000', date '2015-05-04'),
+ ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04
01:02:00.001', date '2014-04-04'),
+ ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', date '2014-05-04')
+ as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+create temporary view t2 as select * from values
+ ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04
01:01:00.000', date '2014-04-04'),
+ ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', date '2014-05-04'),
+ ("val1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04
01:01:00.000', date '2015-05-04'),
+ ("val1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04
01:01:00.000', date '2016-05-04'),
+ ("val1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04
01:01:00.000', null),
+ ("val2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04
01:01:00.000', date '2014-06-04'),
+ ("val1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', date '2014-05-04'),
+ ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04
01:01:00.000', date '2014-06-04'),
+ ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04
01:01:00.000', date '2014-07-04'),
+ ("val1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04
01:01:00.000', date '2014-08-05'),
+ ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04
01:01:00.000', date '2014-09-04'),
+ ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04
01:01:00.000', date '2014-10-04'),
+ ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04
01:01:00.000', null)
+ as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+create temporary view t3 as select * from values
+ ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04
01:02:00.000', date '2014-04-04'),
+ ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04
01:02:00.000', date '2014-05-04'),
+ ("val1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04
01:02:00.000', date '2014-05-04'),
+ ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04
01:02:00.000', date '2014-05-04'),
+ ("val1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04
01:02:00.000', date '2014-06-04'),
+ ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04
01:02:00.000', date '2014-07-04'),
+ ("val3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04
01:02:00.000', date '2014-08-04'),
+ ("val3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04
01:02:00.000', date '2014-09-05'),
+ ("val1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04
01:02:00.000', null),
+ ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04
01:02:00.000', null),
+ ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04
01:02:00.000', date '2014-05-04'),
+ ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04
01:02:00.000', date '2015-05-04')
+ as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
+-- !query 2 schema
+struct<>
+-- !query 2 output
+
+
+
+-- !query 3
+SELECT t1a,
+ t1b,
+ t1c,
+ t3a,
+ t3b,
+ t3c
+FROM t1
+ JOIN t3
+WHERE t1a NOT IN (SELECT t2a
+ FROM t2)
+ AND t1b = t3b
+-- !query 3 schema
+struct<t1a:string,t1b:smallint,t1c:int,t3a:string,t3b:smallint,t3c:int>
+-- !query 3 output
+val1a 6 8 val3a 6 12
+val1a 6 8 val3a 6 12
+val1a 6 8 val3a 6 12
+val1a 6 8 val3a 6 12
+val1d 10 NULL val1b 10 12
+val1d 10 NULL val1b 10 12
+
+
+-- !query 4
+SELECT t1a,
+ t1b,
+ t1c,
+ count(distinct(t3a)),
+ t3b,
+ t3c
+FROM t1
+FULL OUTER JOIN t3 on t1b != t3b
+RIGHT JOIN t2 on t1c = t2c
+where t1a NOT IN
+ (
+ SELECT t2a
+ FROM t2
+ WHERE t2c NOT IN
+ (
+ SELECT t1c
+ FROM t1
+ WHERE t1a = t2a))
+AND t1b != t3b
+AND t1d = t2d
+GROUP BY t1a, t1b, t1c, t3a, t3b, t3c
+HAVING count(distinct(t3a)) >= 1
+ORDER BY t1a
+-- !query 4 schema
+struct<t1a:string,t1b:smallint,t1c:int,count(DISTINCT
t3a):bigint,t3b:smallint,t3c:int>
+-- !query 4 output
+val1c 8 16 1 10 12
+val1c 8 16 1 6 12
+val1c 8 16 1 17 16
+
+
+-- !query 5
+SELECT t1a,
+ t1b,
+ t1c,
+ t1d,
+ t1h
+FROM t1
+WHERE t1a NOT IN
+ (
+ SELECT t2a
+ FROM t2
+ LEFT JOIN t3 on t2b = t3b
+ WHERE t1d = t2d
+ )
+AND t1d NOT IN
+ (
+ SELECT t2d
+ FROM t2
+ RIGHT JOIN t1 on t2e = t1e
+ WHERE t1a = t2a)
+-- !query 5 schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1h:timestamp>
+-- !query 5 output
+val1a 16 12 10 2014-07-04 01:01:00
+val1a 16 12 21 2014-06-04 01:02:00.001
+val1a 6 8 10 2014-04-04 01:00:00
+val1a 6 8 10 2014-04-04 01:02:00.001
+val1d 10 NULL 12 2015-05-04 01:01:00
+val1d NULL 16 22 2014-06-04 01:01:00
+val1e 10 NULL 25 2014-08-04 01:01:00
+
+
+-- !query 6
+SELECT Count(DISTINCT( t1a )),
+ t1b,
+ t1c,
+ t1d
+FROM t1
+WHERE t1a NOT IN (SELECT t2a
+ FROM t2
+ JOIN t1
+ WHERE t2b <> t1b)
+GROUP BY t1b,
+ t1c,
+ t1d
+HAVING t1d NOT IN (SELECT t2d
+ FROM t2
+ WHERE t1d = t2d)
+ORDER BY t1b DESC
+-- !query 6 schema
+struct<count(DISTINCT t1a):bigint,t1b:smallint,t1c:int,t1d:bigint>
+-- !query 6 output
+1 16 12 10
+1 16 12 21
+1 10 NULL 12
+1 6 8 10
+1 NULL 16 22
+
+
+-- !query 7
+SELECT COUNT(DISTINCT(t1a)),
+ t1b,
+ t1c,
+ t1d
+FROM t1
+WHERE t1a NOT IN
+ (
+ SELECT t2a
+ FROM t2 INNER
+ JOIN t1 ON t1a = t2a)
+GROUP BY t1b,
+ t1c,
+ t1d
+HAVING t1b < sum(t1c)
+-- !query 7 schema
+struct<count(DISTINCT t1a):bigint,t1b:smallint,t1c:int,t1d:bigint>
+-- !query 7 output
+1 6 8 10
+
+
+-- !query 8
+SELECT COUNT(DISTINCT(t1a)),
+ t1b,
+ t1c,
+ t1d
+FROM t1
+WHERE t1a NOT IN
+ (
+ SELECT t2a
+ FROM t2 INNER
+ JOIN t1
+ ON t1a = t2a)
+AND t1d NOT IN
+ (
+ SELECT t2d
+ FROM t2
+ INNER JOIN t3
+ ON t2b = t3b )
+GROUP BY t1b,
+ t1c,
+ t1d
+HAVING t1b < sum(t1c)
+-- !query 8 schema
+struct<count(DISTINCT t1a):bigint,t1b:smallint,t1c:int,t1d:bigint>
+-- !query 8 output
+1 6 8 10
--- End diff --
All the results are equivalent with the ones from DB2.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]