Repository: spark
Updated Branches:
  refs/heads/master 601b9c3e6 -> d22db6278


[SPARK-18871][SQL][TESTS] New test cases for IN/NOT IN subquery 2nd batch

## What changes were proposed in this pull request?

This is 2nd batch of test case for IN/NOT IN subquery.  In this PR, it has 
these test cases:
`in-limit.sql`
`in-order-by.sql`
`not-in-group-by.sql`

These are the queries and results from running on DB2.
[in-limit DB2 
version](https://github.com/apache/spark/files/743267/in-limit.sql.db2.out.txt)
[in-order-by DB2 
version](https://github.com/apache/spark/files/743269/in-order-by.sql.db2.txt)
[not-in-group-by DB2 
version](https://github.com/apache/spark/files/743271/not-in-group-by.sql.db2.txt)
[output of in-limit.sql 
DB2](https://github.com/apache/spark/files/743276/in-limit.sql.db2.out.txt)
[output of in-order-by.sql 
DB2](https://github.com/apache/spark/files/743278/in-order-by.sql.db2.out.txt)
[output of not-in-group-by.sql 
DB2](https://github.com/apache/spark/files/743279/not-in-group-by.sql.db2.out.txt)

## How was this patch tested?

This pr is adding new test cases.

Author: Kevin Yu <q...@us.ibm.com>

Closes #16759 from kevinyu98/spark-18871-2.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d22db627
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d22db627
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d22db627

Branch: refs/heads/master
Commit: d22db62785b74f433c51b07605b60126ccaa4d6d
Parents: 601b9c3
Author: Kevin Yu <q...@us.ibm.com>
Authored: Wed Feb 15 17:28:42 2017 +0100
Committer: Herman van Hovell <hvanhov...@databricks.com>
Committed: Wed Feb 15 17:28:42 2017 +0100

----------------------------------------------------------------------
 .../inputs/subquery/in-subquery/in-limit.sql    | 100 ++++++
 .../inputs/subquery/in-subquery/in-order-by.sql | 197 +++++++++++
 .../subquery/in-subquery/not-in-group-by.sql    | 101 ++++++
 .../in-subquery/not-in-multiple-columns.sql     |  55 ----
 .../subquery/in-subquery/in-limit.sql.out       | 147 +++++++++
 .../subquery/in-subquery/in-order-by.sql.out    | 328 +++++++++++++++++++
 .../in-subquery/not-in-group-by.sql.out         | 150 +++++++++
 .../in-subquery/not-in-multiple-columns.sql.out |  59 ----
 8 files changed, 1023 insertions(+), 114 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/d22db627/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql
 
b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql
new file mode 100644
index 0000000..a40ee08
--- /dev/null
+++ 
b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql
@@ -0,0 +1,100 @@
+-- A test suite for IN LIMIT in parent side, subquery, and both predicate 
subquery
+-- It includes correlated cases.
+
+create temporary view t1 as select * from values
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 
01:00:00.000', date '2014-04-04'),
+  ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 
01:02:00.001', date '2014-06-04'),
+  ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 
01:01:00.000', date '2014-07-04'),
+  ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 
01:02:00.001', date '2014-05-05'),
+  ("val1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', null),
+  ("val1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 
01:02:00.001', null),
+  ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 
01:01:00.000', date '2014-08-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 
01:02:00.001', date '2014-09-04'),
+  ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 
01:01:00.000', date '2015-05-04'),
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 
01:02:00.001', date '2014-04-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04')
+  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i);
+
+create temporary view t2 as select * from values
+  ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 
01:01:00.000', date '2014-04-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 
01:01:00.000', date '2015-05-04'),
+  ("val1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 
01:01:00.000', date '2016-05-04'),
+  ("val1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 
01:01:00.000', null),
+  ("val2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', date '2014-06-04'),
+  ("val1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 
01:01:00.000', date '2014-07-04'),
+  ("val1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 
01:01:00.000', date '2014-08-05'),
+  ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 
01:01:00.000', date '2014-09-04'),
+  ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 
01:01:00.000', date '2014-10-04'),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', null)
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i);
+
+create temporary view t3 as select * from values
+  ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 
01:02:00.000', date '2014-04-04'),
+  ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:02:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 
01:02:00.000', date '2014-07-04'),
+  ("val3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 
01:02:00.000', date '2014-08-04'),
+  ("val3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 
01:02:00.000', date '2014-09-05'),
+  ("val1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 
01:02:00.000', null),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 
01:02:00.000', null),
+  ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 
01:02:00.000', date '2015-05-04')
+  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i);
+
+-- correlated IN subquery
+-- LIMIT in parent side
+-- TC 01.01
+SELECT *
+FROM   t1
+WHERE  t1a IN (SELECT t2a
+               FROM   t2
+               WHERE  t1d = t2d)
+LIMIT  2;
+
+-- TC 01.02
+SELECT *
+FROM   t1
+WHERE  t1c IN (SELECT t2c
+               FROM   t2
+               WHERE  t2b >= 8
+               LIMIT  2)
+LIMIT 4;
+
+-- TC 01.03
+SELECT Count(DISTINCT( t1a )),
+       t1b
+FROM   t1
+WHERE  t1d IN (SELECT t2d
+               FROM   t2
+               ORDER  BY t2c
+               LIMIT 2)
+GROUP  BY t1b
+ORDER  BY t1b DESC NULLS FIRST
+LIMIT  1;
+
+-- LIMIT with NOT IN
+-- TC 01.04
+SELECT *
+FROM   t1
+WHERE  t1b NOT IN (SELECT t2b
+                   FROM   t2
+                   WHERE  t2b > 6
+                   LIMIT  2);
+
+-- TC 01.05
+SELECT Count(DISTINCT( t1a )),
+       t1b
+FROM   t1
+WHERE  t1d NOT IN (SELECT t2d
+                   FROM   t2
+                   ORDER  BY t2b DESC nulls first
+                   LIMIT 1)
+GROUP  BY t1b
+ORDER BY t1b NULLS last
+LIMIT  1;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/d22db627/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-order-by.sql
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-order-by.sql
 
b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-order-by.sql
new file mode 100644
index 0000000..892e39f
--- /dev/null
+++ 
b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-order-by.sql
@@ -0,0 +1,197 @@
+-- A test suite for ORDER BY in parent side, subquery, and both predicate 
subquery
+-- It includes correlated cases.
+
+create temporary view t1 as select * from values
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 
01:00:00.000', date '2014-04-04'),
+  ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 
01:02:00.001', date '2014-06-04'),
+  ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 
01:01:00.000', date '2014-07-04'),
+  ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 
01:02:00.001', date '2014-05-05'),
+  ("val1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', null),
+  ("val1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 
01:02:00.001', null),
+  ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 
01:01:00.000', date '2014-08-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 
01:02:00.001', date '2014-09-04'),
+  ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 
01:01:00.000', date '2015-05-04'),
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 
01:02:00.001', date '2014-04-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04')
+  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i);
+
+create temporary view t2 as select * from values
+  ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 
01:01:00.000', date '2014-04-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 
01:01:00.000', date '2015-05-04'),
+  ("val1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 
01:01:00.000', date '2016-05-04'),
+  ("val1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 
01:01:00.000', null),
+  ("val2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', date '2014-06-04'),
+  ("val1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 
01:01:00.000', date '2014-07-04'),
+  ("val1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 
01:01:00.000', date '2014-08-05'),
+  ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 
01:01:00.000', date '2014-09-04'),
+  ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 
01:01:00.000', date '2014-10-04'),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', null)
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i);
+
+create temporary view t3 as select * from values
+  ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 
01:02:00.000', date '2014-04-04'),
+  ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:02:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 
01:02:00.000', date '2014-07-04'),
+  ("val3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 
01:02:00.000', date '2014-08-04'),
+  ("val3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 
01:02:00.000', date '2014-09-05'),
+  ("val1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 
01:02:00.000', null),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 
01:02:00.000', null),
+  ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 
01:02:00.000', date '2015-05-04')
+  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i);
+
+-- correlated IN subquery
+-- ORDER BY in parent side
+-- TC 01.01
+SELECT *
+FROM   t1
+WHERE  t1a IN (SELECT t2a
+               FROM   t2)
+ORDER  BY t1a;
+
+-- TC 01.02
+SELECT t1a
+FROM   t1
+WHERE  t1b IN (SELECT t2b
+               FROM   t2
+               WHERE  t1a = t2a)
+ORDER  BY t1b DESC;
+
+-- TC 01.03
+SELECT t1a,
+       t1b
+FROM   t1
+WHERE  t1c IN (SELECT t2c
+               FROM   t2
+               WHERE  t1a = t2a)
+ORDER  BY 2 DESC nulls last;
+
+-- TC 01.04
+SELECT Count(DISTINCT( t1a ))
+FROM   t1
+WHERE  t1b IN (SELECT t2b
+               FROM   t2
+               WHERE  t1a = t2a)
+ORDER  BY Count(DISTINCT( t1a ));
+
+-- ORDER BY in subquery
+-- TC 01.05
+SELECT *
+FROM   t1
+WHERE  t1b IN (SELECT t2c
+               FROM   t2
+               ORDER  BY t2d);
+
+-- ORDER BY in BOTH
+-- TC 01.06
+SELECT *
+FROM   t1
+WHERE  t1b IN (SELECT Min(t2b)
+               FROM   t2
+               WHERE  t1b = t2b
+               ORDER  BY Min(t2b))
+ORDER BY t1c DESC nulls first;
+
+-- TC 01.07
+SELECT t1a,
+       t1b,
+       t1h
+FROM   t1
+WHERE  t1c IN (SELECT t2c
+               FROM   t2
+               WHERE  t1a = t2a
+               ORDER  BY t2b DESC nulls first)
+        OR t1h IN (SELECT t2h
+                   FROM   t2
+                   WHERE  t1h > t2h)
+ORDER  BY t1h DESC nulls last;
+
+-- ORDER BY with NOT IN
+-- TC 01.08
+SELECT *
+FROM   t1
+WHERE  t1a NOT IN (SELECT t2a
+                   FROM   t2)
+ORDER  BY t1a;
+
+-- TC 01.09
+SELECT t1a,
+       t1b
+FROM   t1
+WHERE  t1a NOT IN (SELECT t2a
+                   FROM   t2
+                   WHERE  t1a = t2a)
+ORDER  BY t1b DESC nulls last;
+
+-- TC 01.10
+SELECT *
+FROM   t1
+WHERE  t1a NOT IN (SELECT t2a
+                   FROM   t2
+                   ORDER  BY t2a DESC nulls first)
+       and t1c IN (SELECT t2c
+                   FROM   t2
+                   ORDER  BY t2b DESC nulls last)
+ORDER  BY t1c DESC nulls last;
+
+-- GROUP BY and ORDER BY
+-- TC 01.11
+SELECT *
+FROM   t1
+WHERE  t1b IN (SELECT Min(t2b)
+               FROM   t2
+               GROUP  BY t2a
+               ORDER  BY t2a DESC);
+
+-- TC 01.12
+SELECT t1a,
+       Count(DISTINCT( t1b ))
+FROM   t1
+WHERE  t1b IN (SELECT Min(t2b)
+               FROM   t2
+               WHERE  t1a = t2a
+               GROUP  BY t2a
+               ORDER  BY t2a)
+GROUP  BY t1a,
+          t1h
+ORDER BY t1a;
+
+-- GROUP BY and ORDER BY with NOT IN
+-- TC 01.13
+SELECT *
+FROM   t1
+WHERE  t1b NOT IN (SELECT Min(t2b)
+                   FROM   t2
+                   GROUP  BY t2a
+                   ORDER  BY t2a);
+
+-- TC 01.14
+SELECT t1a,
+       Sum(DISTINCT( t1b ))
+FROM   t1
+WHERE  t1b NOT IN (SELECT Min(t2b)
+                   FROM   t2
+                   WHERE  t1a = t2a
+                   GROUP  BY t2c
+                   ORDER  BY t2c DESC nulls last)
+GROUP  BY t1a;
+
+-- TC 01.15
+SELECT Count(DISTINCT( t1a )),
+       t1b
+FROM   t1
+WHERE  t1h NOT IN (SELECT t2h
+                   FROM   t2
+                   where t1a = t2a
+                   order by t2d DESC nulls first
+                   )
+GROUP  BY t1a,
+          t1b
+ORDER  BY t1b DESC nulls last;

http://git-wip-us.apache.org/repos/asf/spark/blob/d22db627/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-group-by.sql
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-group-by.sql
 
b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-group-by.sql
new file mode 100644
index 0000000..58cf109
--- /dev/null
+++ 
b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-group-by.sql
@@ -0,0 +1,101 @@
+-- A test suite for NOT IN GROUP BY in parent side, subquery, and both 
predicate subquery
+-- It includes correlated cases.
+
+create temporary view t1 as select * from values
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 
01:00:00.000', date '2014-04-04'),
+  ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 
01:02:00.001', date '2014-06-04'),
+  ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 
01:01:00.000', date '2014-07-04'),
+  ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 
01:02:00.001', date '2014-05-05'),
+  ("val1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', null),
+  ("val1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 
01:02:00.001', null),
+  ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 
01:01:00.000', date '2014-08-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 
01:02:00.001', date '2014-09-04'),
+  ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 
01:01:00.000', date '2015-05-04'),
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 
01:02:00.001', date '2014-04-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04')
+  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i);
+
+create temporary view t2 as select * from values
+  ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 
01:01:00.000', date '2014-04-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 
01:01:00.000', date '2015-05-04'),
+  ("val1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 
01:01:00.000', date '2016-05-04'),
+  ("val1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 
01:01:00.000', null),
+  ("val2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', date '2014-06-04'),
+  ("val1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 
01:01:00.000', date '2014-07-04'),
+  ("val1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 
01:01:00.000', date '2014-08-05'),
+  ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 
01:01:00.000', date '2014-09-04'),
+  ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 
01:01:00.000', date '2014-10-04'),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', null)
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i);
+
+create temporary view t3 as select * from values
+  ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 
01:02:00.000', date '2014-04-04'),
+  ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:02:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 
01:02:00.000', date '2014-07-04'),
+  ("val3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 
01:02:00.000', date '2014-08-04'),
+  ("val3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 
01:02:00.000', date '2014-09-05'),
+  ("val1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 
01:02:00.000', null),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 
01:02:00.000', null),
+  ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 
01:02:00.000', date '2015-05-04')
+  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i);
+
+
+-- correlated IN subquery
+-- GROUP BY in parent side
+-- TC 01.01
+SELECT t1a,
+       Avg(t1b)
+FROM   t1
+WHERE  t1a NOT IN (SELECT t2a
+                   FROM   t2)
+GROUP  BY t1a;
+
+-- TC 01.02
+SELECT t1a,
+       Sum(DISTINCT( t1b ))
+FROM   t1
+WHERE  t1d NOT IN (SELECT t2d
+                   FROM   t2
+                   WHERE  t1h < t2h)
+GROUP  BY t1a;
+
+-- TC 01.03
+SELECT Count(*)
+FROM   (SELECT *
+        FROM   t2
+        WHERE  t2a NOT IN (SELECT t3a
+                           FROM   t3
+                           WHERE  t3h != t2h)) t2
+WHERE  t2b NOT IN (SELECT Min(t2b)
+                   FROM   t2
+                   WHERE  t2b = t2b
+                   GROUP  BY t2c);
+
+-- TC 01.04
+SELECT t1a,
+       max(t1b)
+FROM   t1
+WHERE  t1c NOT IN (SELECT Max(t2b)
+                   FROM   t2
+                   WHERE  t1a = t2a
+                   GROUP  BY t2a)
+GROUP BY t1a;
+
+-- TC 01.05
+SELECT t1a,
+       t1b
+FROM   t1
+WHERE  t1c IN (SELECT t2b
+               FROM   t2
+               WHERE  t2a NOT IN (SELECT Min(t3a)
+                                  FROM   t3
+                                  WHERE  t3a = t2a
+                                  GROUP  BY t3b) order by t2a);

http://git-wip-us.apache.org/repos/asf/spark/blob/d22db627/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-multiple-columns.sql
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-multiple-columns.sql
 
b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-multiple-columns.sql
deleted file mode 100644
index db66850..0000000
--- 
a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-multiple-columns.sql
+++ /dev/null
@@ -1,55 +0,0 @@
--- This file contains test cases for NOT IN subquery with multiple columns.
-
--- The data sets are populated as follows:
--- 1) When T1.A1 = T2.A2
---    1.1) T1.B1 = T2.B2
---    1.2) T1.B1 = T2.B2 returns false
---    1.3) T1.B1 is null
---    1.4) T2.B2 is null
--- 2) When T1.A1 = T2.A2 returns false
--- 3) When T1.A1 is null
--- 4) When T1.A2 is null
-
--- T1.A1  T1.B1     T2.A2  T2.B2
--- -----  -----     -----  -----
---     1      1         1      1    (1.1)
---     1      3                     (1.2)
---     1   null         1   null    (1.3 & 1.4)
---
---     2      1         1      1    (2)
---  null      1                     (3)
---                   null      3    (4)
-
-create temporary view t1 as select * from values
-  (1, 1), (2, 1), (null, 1),
-  (1, 3), (null, 3),
-  (1, null), (null, 2)
-as t1(a1, b1);
-
-create temporary view t2 as select * from values
-  (1, 1),
-  (null, 3),
-  (1, null)
-as t2(a2, b2);
-
--- multiple columns in NOT IN
--- TC 01.01
-select a1,b1
-from   t1
-where  (a1,b1) not in (select a2,b2
-                       from   t2);
-
--- multiple columns with expressions in NOT IN
--- TC 01.02
-select a1,b1
-from   t1
-where  (a1-1,b1) not in (select a2,b2
-                         from   t2);
-
--- multiple columns with expressions in NOT IN
--- TC 01.02
-select a1,b1
-from   t1
-where  (a1,b1) not in (select a2+1,b2
-                       from   t2);
-

http://git-wip-us.apache.org/repos/asf/spark/blob/d22db627/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-limit.sql.out
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-limit.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-limit.sql.out
new file mode 100644
index 0000000..71ca1f8
--- /dev/null
+++ 
b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-limit.sql.out
@@ -0,0 +1,147 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 8
+
+
+-- !query 0
+create temporary view t1 as select * from values
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 
01:00:00.000', date '2014-04-04'),
+  ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 
01:02:00.001', date '2014-06-04'),
+  ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 
01:01:00.000', date '2014-07-04'),
+  ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 
01:02:00.001', date '2014-05-05'),
+  ("val1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', null),
+  ("val1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 
01:02:00.001', null),
+  ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 
01:01:00.000', date '2014-08-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 
01:02:00.001', date '2014-09-04'),
+  ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 
01:01:00.000', date '2015-05-04'),
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 
01:02:00.001', date '2014-04-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04')
+  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+create temporary view t2 as select * from values
+  ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 
01:01:00.000', date '2014-04-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 
01:01:00.000', date '2015-05-04'),
+  ("val1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 
01:01:00.000', date '2016-05-04'),
+  ("val1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 
01:01:00.000', null),
+  ("val2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', date '2014-06-04'),
+  ("val1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 
01:01:00.000', date '2014-07-04'),
+  ("val1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 
01:01:00.000', date '2014-08-05'),
+  ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 
01:01:00.000', date '2014-09-04'),
+  ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 
01:01:00.000', date '2014-10-04'),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', null)
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+create temporary view t3 as select * from values
+  ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 
01:02:00.000', date '2014-04-04'),
+  ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:02:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 
01:02:00.000', date '2014-07-04'),
+  ("val3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 
01:02:00.000', date '2014-08-04'),
+  ("val3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 
01:02:00.000', date '2014-09-05'),
+  ("val1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 
01:02:00.000', null),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 
01:02:00.000', null),
+  ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 
01:02:00.000', date '2015-05-04')
+  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
+-- !query 2 schema
+struct<>
+-- !query 2 output
+
+
+
+-- !query 3
+SELECT *
+FROM   t1
+WHERE  t1a IN (SELECT t2a
+               FROM   t2
+               WHERE  t1d = t2d)
+LIMIT  2
+-- !query 3 schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
+-- !query 3 output
+val1b  8       16      19      17.0    25.0    2600    2014-05-04 01:01:00     
2014-05-04
+val1c  8       16      19      17.0    25.0    2600    2014-05-04 01:02:00.001 
2014-05-05
+
+
+-- !query 4
+SELECT *
+FROM   t1
+WHERE  t1c IN (SELECT t2c
+               FROM   t2
+               WHERE  t2b >= 8
+               LIMIT  2)
+LIMIT 4
+-- !query 4 schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
+-- !query 4 output
+val1a  16      12      10      15.0    20.0    2000    2014-07-04 01:01:00     
2014-07-04
+val1a  16      12      21      15.0    20.0    2000    2014-06-04 01:02:00.001 
2014-06-04
+val1b  8       16      19      17.0    25.0    2600    2014-05-04 01:01:00     
2014-05-04
+val1c  8       16      19      17.0    25.0    2600    2014-05-04 01:02:00.001 
2014-05-05
+
+
+-- !query 5
+SELECT Count(DISTINCT( t1a )),
+       t1b
+FROM   t1
+WHERE  t1d IN (SELECT t2d
+               FROM   t2
+               ORDER  BY t2c
+               LIMIT 2)
+GROUP  BY t1b
+ORDER  BY t1b DESC NULLS FIRST
+LIMIT  1
+-- !query 5 schema
+struct<count(DISTINCT t1a):bigint,t1b:smallint>
+-- !query 5 output
+1      NULL
+
+
+-- !query 6
+SELECT *
+FROM   t1
+WHERE  t1b NOT IN (SELECT t2b
+                   FROM   t2
+                   WHERE  t2b > 6
+                   LIMIT  2)
+-- !query 6 schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
+-- !query 6 output
+val1a  16      12      10      15.0    20.0    2000    2014-07-04 01:01:00     
2014-07-04
+val1a  16      12      21      15.0    20.0    2000    2014-06-04 01:02:00.001 
2014-06-04
+val1a  6       8       10      15.0    20.0    2000    2014-04-04 01:00:00     
2014-04-04
+val1a  6       8       10      15.0    20.0    2000    2014-04-04 01:02:00.001 
2014-04-04
+
+
+-- !query 7
+SELECT Count(DISTINCT( t1a )),
+       t1b
+FROM   t1
+WHERE  t1d NOT IN (SELECT t2d
+                   FROM   t2
+                   ORDER  BY t2b DESC nulls first
+                   LIMIT 1)
+GROUP  BY t1b
+ORDER BY t1b NULLS last
+LIMIT  1
+-- !query 7 schema
+struct<count(DISTINCT t1a):bigint,t1b:smallint>
+-- !query 7 output
+1      6

http://git-wip-us.apache.org/repos/asf/spark/blob/d22db627/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-order-by.sql.out
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-order-by.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-order-by.sql.out
new file mode 100644
index 0000000..4bebd96
--- /dev/null
+++ 
b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-order-by.sql.out
@@ -0,0 +1,328 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 18
+
+
+-- !query 0
+create temporary view t1 as select * from values
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 
01:00:00.000', date '2014-04-04'),
+  ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 
01:02:00.001', date '2014-06-04'),
+  ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 
01:01:00.000', date '2014-07-04'),
+  ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 
01:02:00.001', date '2014-05-05'),
+  ("val1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', null),
+  ("val1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 
01:02:00.001', null),
+  ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 
01:01:00.000', date '2014-08-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 
01:02:00.001', date '2014-09-04'),
+  ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 
01:01:00.000', date '2015-05-04'),
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 
01:02:00.001', date '2014-04-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04')
+  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+create temporary view t2 as select * from values
+  ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 
01:01:00.000', date '2014-04-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 
01:01:00.000', date '2015-05-04'),
+  ("val1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 
01:01:00.000', date '2016-05-04'),
+  ("val1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 
01:01:00.000', null),
+  ("val2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', date '2014-06-04'),
+  ("val1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 
01:01:00.000', date '2014-07-04'),
+  ("val1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 
01:01:00.000', date '2014-08-05'),
+  ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 
01:01:00.000', date '2014-09-04'),
+  ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 
01:01:00.000', date '2014-10-04'),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', null)
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+create temporary view t3 as select * from values
+  ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 
01:02:00.000', date '2014-04-04'),
+  ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:02:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 
01:02:00.000', date '2014-07-04'),
+  ("val3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 
01:02:00.000', date '2014-08-04'),
+  ("val3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 
01:02:00.000', date '2014-09-05'),
+  ("val1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 
01:02:00.000', null),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 
01:02:00.000', null),
+  ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 
01:02:00.000', date '2015-05-04')
+  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
+-- !query 2 schema
+struct<>
+-- !query 2 output
+
+
+
+-- !query 3
+SELECT *
+FROM   t1
+WHERE  t1a IN (SELECT t2a
+               FROM   t2)
+ORDER  BY t1a
+-- !query 3 schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
+-- !query 3 output
+val1b  8       16      19      17.0    25.0    2600    2014-05-04 01:01:00     
2014-05-04
+val1c  8       16      19      17.0    25.0    2600    2014-05-04 01:02:00.001 
2014-05-05
+val1e  10      NULL    25      17.0    25.0    2600    2014-08-04 01:01:00     
2014-08-04
+val1e  10      NULL    19      17.0    25.0    2600    2014-09-04 01:02:00.001 
2014-09-04
+val1e  10      NULL    19      17.0    25.0    2600    2014-05-04 01:01:00     
2014-05-04
+
+
+-- !query 4
+SELECT t1a
+FROM   t1
+WHERE  t1b IN (SELECT t2b
+               FROM   t2
+               WHERE  t1a = t2a)
+ORDER  BY t1b DESC
+-- !query 4 schema
+struct<t1a:string>
+-- !query 4 output
+val1b
+
+
+-- !query 5
+SELECT t1a,
+       t1b
+FROM   t1
+WHERE  t1c IN (SELECT t2c
+               FROM   t2
+               WHERE  t1a = t2a)
+ORDER  BY 2 DESC nulls last
+-- !query 5 schema
+struct<t1a:string,t1b:smallint>
+-- !query 5 output
+val1b  8
+val1c  8
+
+
+-- !query 6
+SELECT Count(DISTINCT( t1a ))
+FROM   t1
+WHERE  t1b IN (SELECT t2b
+               FROM   t2
+               WHERE  t1a = t2a)
+ORDER  BY Count(DISTINCT( t1a ))
+-- !query 6 schema
+struct<count(DISTINCT t1a):bigint>
+-- !query 6 output
+1
+
+
+-- !query 7
+SELECT *
+FROM   t1
+WHERE  t1b IN (SELECT t2c
+               FROM   t2
+               ORDER  BY t2d)
+-- !query 7 schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
+-- !query 7 output
+val1a  16      12      10      15.0    20.0    2000    2014-07-04 01:01:00     
2014-07-04
+val1a  16      12      21      15.0    20.0    2000    2014-06-04 01:02:00.001 
2014-06-04
+
+
+-- !query 8
+SELECT *
+FROM   t1
+WHERE  t1b IN (SELECT Min(t2b)
+               FROM   t2
+               WHERE  t1b = t2b
+               ORDER  BY Min(t2b))
+ORDER BY t1c DESC nulls first
+-- !query 8 schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
+-- !query 8 output
+val1e  10      NULL    25      17.0    25.0    2600    2014-08-04 01:01:00     
2014-08-04
+val1e  10      NULL    19      17.0    25.0    2600    2014-09-04 01:02:00.001 
2014-09-04
+val1d  10      NULL    12      17.0    25.0    2600    2015-05-04 01:01:00     
2015-05-04
+val1e  10      NULL    19      17.0    25.0    2600    2014-05-04 01:01:00     
2014-05-04
+val1b  8       16      19      17.0    25.0    2600    2014-05-04 01:01:00     
2014-05-04
+val1c  8       16      19      17.0    25.0    2600    2014-05-04 01:02:00.001 
2014-05-05
+val1a  6       8       10      15.0    20.0    2000    2014-04-04 01:00:00     
2014-04-04
+val1a  6       8       10      15.0    20.0    2000    2014-04-04 01:02:00.001 
2014-04-04
+
+
+-- !query 9
+SELECT t1a,
+       t1b,
+       t1h
+FROM   t1
+WHERE  t1c IN (SELECT t2c
+               FROM   t2
+               WHERE  t1a = t2a
+               ORDER  BY t2b DESC nulls first)
+        OR t1h IN (SELECT t2h
+                   FROM   t2
+                   WHERE  t1h > t2h)
+ORDER  BY t1h DESC nulls last
+-- !query 9 schema
+struct<t1a:string,t1b:smallint,t1h:timestamp>
+-- !query 9 output
+val1c  8       2014-05-04 01:02:00.001
+val1b  8       2014-05-04 01:01:00
+
+
+-- !query 10
+SELECT *
+FROM   t1
+WHERE  t1a NOT IN (SELECT t2a
+                   FROM   t2)
+ORDER  BY t1a
+-- !query 10 schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
+-- !query 10 output
+val1a  6       8       10      15.0    20.0    2000    2014-04-04 01:00:00     
2014-04-04
+val1a  16      12      21      15.0    20.0    2000    2014-06-04 01:02:00.001 
2014-06-04
+val1a  16      12      10      15.0    20.0    2000    2014-07-04 01:01:00     
2014-07-04
+val1a  6       8       10      15.0    20.0    2000    2014-04-04 01:02:00.001 
2014-04-04
+val1d  NULL    16      22      17.0    25.0    2600    2014-06-04 01:01:00     
NULL
+val1d  NULL    16      19      17.0    25.0    2600    2014-07-04 01:02:00.001 
NULL
+val1d  10      NULL    12      17.0    25.0    2600    2015-05-04 01:01:00     
2015-05-04
+
+
+-- !query 11
+SELECT t1a,
+       t1b
+FROM   t1
+WHERE  t1a NOT IN (SELECT t2a
+                   FROM   t2
+                   WHERE  t1a = t2a)
+ORDER  BY t1b DESC nulls last
+-- !query 11 schema
+struct<t1a:string,t1b:smallint>
+-- !query 11 output
+val1a  16
+val1a  16
+val1d  10
+val1a  6
+val1a  6
+val1d  NULL
+val1d  NULL
+
+
+-- !query 12
+SELECT *
+FROM   t1
+WHERE  t1a NOT IN (SELECT t2a
+                   FROM   t2
+                   ORDER  BY t2a DESC nulls first)
+       and t1c IN (SELECT t2c
+                   FROM   t2
+                   ORDER  BY t2b DESC nulls last)
+ORDER  BY t1c DESC nulls last
+-- !query 12 schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
+-- !query 12 output
+val1d  NULL    16      22      17.0    25.0    2600    2014-06-04 01:01:00     
NULL
+val1d  NULL    16      19      17.0    25.0    2600    2014-07-04 01:02:00.001 
NULL
+val1a  16      12      21      15.0    20.0    2000    2014-06-04 01:02:00.001 
2014-06-04
+val1a  16      12      10      15.0    20.0    2000    2014-07-04 01:01:00     
2014-07-04
+
+
+-- !query 13
+SELECT *
+FROM   t1
+WHERE  t1b IN (SELECT Min(t2b)
+               FROM   t2
+               GROUP  BY t2a
+               ORDER  BY t2a DESC)
+-- !query 13 schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
+-- !query 13 output
+val1a  6       8       10      15.0    20.0    2000    2014-04-04 01:00:00     
2014-04-04
+val1a  6       8       10      15.0    20.0    2000    2014-04-04 01:02:00.001 
2014-04-04
+val1b  8       16      19      17.0    25.0    2600    2014-05-04 01:01:00     
2014-05-04
+val1c  8       16      19      17.0    25.0    2600    2014-05-04 01:02:00.001 
2014-05-05
+
+
+-- !query 14
+SELECT t1a,
+       Count(DISTINCT( t1b ))
+FROM   t1
+WHERE  t1b IN (SELECT Min(t2b)
+               FROM   t2
+               WHERE  t1a = t2a
+               GROUP  BY t2a
+               ORDER  BY t2a)
+GROUP  BY t1a,
+          t1h
+ORDER BY t1a
+-- !query 14 schema
+struct<t1a:string,count(DISTINCT t1b):bigint>
+-- !query 14 output
+val1b  1
+
+
+-- !query 15
+SELECT *
+FROM   t1
+WHERE  t1b NOT IN (SELECT Min(t2b)
+                   FROM   t2
+                   GROUP  BY t2a
+                   ORDER  BY t2a)
+-- !query 15 schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
+-- !query 15 output
+val1a  16      12      10      15.0    20.0    2000    2014-07-04 01:01:00     
2014-07-04
+val1a  16      12      21      15.0    20.0    2000    2014-06-04 01:02:00.001 
2014-06-04
+val1d  10      NULL    12      17.0    25.0    2600    2015-05-04 01:01:00     
2015-05-04
+val1e  10      NULL    19      17.0    25.0    2600    2014-05-04 01:01:00     
2014-05-04
+val1e  10      NULL    19      17.0    25.0    2600    2014-09-04 01:02:00.001 
2014-09-04
+val1e  10      NULL    25      17.0    25.0    2600    2014-08-04 01:01:00     
2014-08-04
+
+
+-- !query 16
+SELECT t1a,
+       Sum(DISTINCT( t1b ))
+FROM   t1
+WHERE  t1b NOT IN (SELECT Min(t2b)
+                   FROM   t2
+                   WHERE  t1a = t2a
+                   GROUP  BY t2c
+                   ORDER  BY t2c DESC nulls last)
+GROUP  BY t1a
+-- !query 16 schema
+struct<t1a:string,sum(DISTINCT t1b):bigint>
+-- !query 16 output
+val1a  22
+val1c  8
+val1d  10
+val1e  10
+
+
+-- !query 17
+SELECT Count(DISTINCT( t1a )),
+       t1b
+FROM   t1
+WHERE  t1h NOT IN (SELECT t2h
+                   FROM   t2
+                   where t1a = t2a
+                   order by t2d DESC nulls first
+                   )
+GROUP  BY t1a,
+          t1b
+ORDER  BY t1b DESC nulls last
+-- !query 17 schema
+struct<count(DISTINCT t1a):bigint,t1b:smallint>
+-- !query 17 output
+1      16
+1      10
+1      10
+1      8
+1      6
+1      NULL

http://git-wip-us.apache.org/repos/asf/spark/blob/d22db627/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-group-by.sql.out
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-group-by.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-group-by.sql.out
new file mode 100644
index 0000000..6b86a9f
--- /dev/null
+++ 
b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-group-by.sql.out
@@ -0,0 +1,150 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 8
+
+
+-- !query 0
+create temporary view t1 as select * from values
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 
01:00:00.000', date '2014-04-04'),
+  ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 
01:02:00.001', date '2014-06-04'),
+  ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 
01:01:00.000', date '2014-07-04'),
+  ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 
01:02:00.001', date '2014-05-05'),
+  ("val1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', null),
+  ("val1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 
01:02:00.001', null),
+  ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 
01:01:00.000', date '2014-08-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 
01:02:00.001', date '2014-09-04'),
+  ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 
01:01:00.000', date '2015-05-04'),
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 
01:02:00.001', date '2014-04-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04')
+  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+create temporary view t2 as select * from values
+  ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 
01:01:00.000', date '2014-04-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 
01:01:00.000', date '2015-05-04'),
+  ("val1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 
01:01:00.000', date '2016-05-04'),
+  ("val1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 
01:01:00.000', null),
+  ("val2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', date '2014-06-04'),
+  ("val1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:01:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 
01:01:00.000', date '2014-07-04'),
+  ("val1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 
01:01:00.000', date '2014-08-05'),
+  ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 
01:01:00.000', date '2014-09-04'),
+  ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 
01:01:00.000', date '2014-10-04'),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:01:00.000', null)
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+create temporary view t3 as select * from values
+  ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 
01:02:00.000', date '2014-04-04'),
+  ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 
01:02:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 
01:02:00.000', date '2014-07-04'),
+  ("val3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 
01:02:00.000', date '2014-08-04'),
+  ("val3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 
01:02:00.000', date '2014-09-05'),
+  ("val1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 
01:02:00.000', null),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 
01:02:00.000', null),
+  ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 
01:02:00.000', date '2014-05-04'),
+  ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 
01:02:00.000', date '2015-05-04')
+  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
+-- !query 2 schema
+struct<>
+-- !query 2 output
+
+
+
+-- !query 3
+SELECT t1a,
+       Avg(t1b)
+FROM   t1
+WHERE  t1a NOT IN (SELECT t2a
+                   FROM   t2)
+GROUP  BY t1a
+-- !query 3 schema
+struct<t1a:string,avg(t1b):double>
+-- !query 3 output
+val1a  11.0
+val1d  10.0
+
+
+-- !query 4
+SELECT t1a,
+       Sum(DISTINCT( t1b ))
+FROM   t1
+WHERE  t1d NOT IN (SELECT t2d
+                   FROM   t2
+                   WHERE  t1h < t2h)
+GROUP  BY t1a
+-- !query 4 schema
+struct<t1a:string,sum(DISTINCT t1b):bigint>
+-- !query 4 output
+val1a  22
+val1d  10
+val1e  10
+
+
+-- !query 5
+SELECT Count(*)
+FROM   (SELECT *
+        FROM   t2
+        WHERE  t2a NOT IN (SELECT t3a
+                           FROM   t3
+                           WHERE  t3h != t2h)) t2
+WHERE  t2b NOT IN (SELECT Min(t2b)
+                   FROM   t2
+                   WHERE  t2b = t2b
+                   GROUP  BY t2c)
+-- !query 5 schema
+struct<count(1):bigint>
+-- !query 5 output
+4
+
+
+-- !query 6
+SELECT t1a,
+       max(t1b)
+FROM   t1
+WHERE  t1c NOT IN (SELECT Max(t2b)
+                   FROM   t2
+                   WHERE  t1a = t2a
+                   GROUP  BY t2a)
+GROUP BY t1a
+-- !query 6 schema
+struct<t1a:string,max(t1b):smallint>
+-- !query 6 output
+val1a  16
+val1b  8
+val1c  8
+val1d  10
+
+
+-- !query 7
+SELECT t1a,
+       t1b
+FROM   t1
+WHERE  t1c IN (SELECT t2b
+               FROM   t2
+               WHERE  t2a NOT IN (SELECT Min(t3a)
+                                  FROM   t3
+                                  WHERE  t3a = t2a
+                                  GROUP  BY t3b) order by t2a)
+-- !query 7 schema
+struct<t1a:string,t1b:smallint>
+-- !query 7 output
+val1a  16
+val1a  16
+val1a  6
+val1a  6

http://git-wip-us.apache.org/repos/asf/spark/blob/d22db627/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-multiple-columns.sql.out
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-multiple-columns.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-multiple-columns.sql.out
deleted file mode 100644
index 756c378..0000000
--- 
a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-multiple-columns.sql.out
+++ /dev/null
@@ -1,59 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- Number of queries: 5
-
-
--- !query 0
-create temporary view t1 as select * from values
-  (1, 1), (2, 1), (null, 1),
-  (1, 3), (null, 3),
-  (1, null), (null, 2)
-as t1(a1, b1)
--- !query 0 schema
-struct<>
--- !query 0 output
-
-
-
--- !query 1
-create temporary view t2 as select * from values
-  (1, 1),
-  (null, 3),
-  (1, null)
-as t2(a2, b2)
--- !query 1 schema
-struct<>
--- !query 1 output
-
-
-
--- !query 2
-select a1,b1
-from   t1
-where  (a1,b1) not in (select a2,b2
-                       from   t2)
--- !query 2 schema
-struct<a1:int,b1:int>
--- !query 2 output
-2      1
-
-
--- !query 3
-select a1,b1
-from   t1
-where  (a1-1,b1) not in (select a2,b2
-                         from   t2)
--- !query 3 schema
-struct<a1:int,b1:int>
--- !query 3 output
-1      1
-
-
--- !query 4
-select a1,b1
-from   t1
-where  (a1,b1) not in (select a2+1,b2
-                       from   t2)
--- !query 4 schema
-struct<a1:int,b1:int>
--- !query 4 output
-1      1


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to