Github user nsyca commented on a diff in the pull request:
https://github.com/apache/spark/pull/16760#discussion_r98794587
--- Diff:
sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-aggregate.sql.out
---
@@ -0,0 +1,183 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 11
+
+
+-- !query 0
+CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
+ (100, "emp 1", date "2005-01-01", 100.00D, 10),
+ (100, "emp 1", date "2005-01-01", 100.00D, 10),
+ (200, "emp 2", date "2003-01-01", 200.00D, 10),
+ (300, "emp 3", date "2002-01-01", 300.00D, 20),
+ (400, "emp 4", date "2005-01-01", 400.00D, 30),
+ (500, "emp 5", date "2001-01-01", 400.00D, NULL),
+ (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
+ (700, "emp 7", date "2010-01-01", 400.00D, 100),
+ (800, "emp 8", date "2016-01-01", 150.00D, 70)
+AS EMP(id, emp_name, hiredate, salary, dept_id)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
+ (10, "dept 1", "CA"),
+ (20, "dept 2", "NY"),
+ (30, "dept 3", "TX"),
+ (40, "dept 4 - unassigned", "OR"),
+ (50, "dept 5 - unassigned", "NJ"),
+ (70, "dept 7", "FL")
+AS DEPT(dept_id, dept_name, state)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
+ ("emp 1", 10.00D),
+ ("emp 1", 20.00D),
+ ("emp 2", 300.00D),
+ ("emp 2", 100.00D),
+ ("emp 3", 300.00D),
+ ("emp 4", 100.00D),
+ ("emp 5", 1000.00D),
+ ("emp 6 - no dept", 500.00D)
+AS BONUS(emp_name, bonus_amt)
+-- !query 2 schema
+struct<>
+-- !query 2 output
+
+
+
+-- !query 3
+SELECT emp.dept_id,
+ avg(salary),
+ sum(salary)
+FROM emp
+WHERE EXISTS (SELECT state
+ FROM dept
+ WHERE dept.dept_id = emp.dept_id)
+GROUP BY dept_id
+-- !query 3 schema
+struct<dept_id:int,avg(salary):double,sum(salary):double>
+-- !query 3 output
+10 133.33333333333334 400.0
+20 300.0 300.0
+30 400.0 400.0
+70 150.0 150.0
+
+
+-- !query 4
+SELECT emp_name
+FROM emp
+WHERE EXISTS (SELECT max(dept.dept_id) a
+ FROM dept
+ WHERE dept.dept_id = emp.dept_id
+ GROUP BY dept.dept_id)
+-- !query 4 schema
+struct<emp_name:string>
+-- !query 4 output
+emp 1
+emp 1
+emp 2
+emp 3
+emp 4
+emp 8
+
+
+-- !query 5
+SELECT count(*)
+FROM emp
+WHERE EXISTS (SELECT max(dept.dept_id) a
+ FROM dept
+ WHERE dept.dept_id = emp.dept_id
+ GROUP BY dept.dept_id)
+-- !query 5 schema
+struct<count(1):bigint>
+-- !query 5 output
+6
+
+
+-- !query 6
+SELECT *
+FROM bonus
+WHERE EXISTS (SELECT 1
+ FROM emp
+ WHERE emp.emp_name = bonus.emp_name
+ AND EXISTS (SELECT max(dept.dept_id)
+ FROM dept
+ WHERE emp.dept_id = dept.dept_id
+ GROUP BY dept.dept_id))
+-- !query 6 schema
+struct<emp_name:string,bonus_amt:double>
+-- !query 6 output
+emp 1 10.0
+emp 1 20.0
+emp 2 100.0
+emp 2 300.0
+emp 3 300.0
+emp 4 100.0
+
+
+-- !query 7
+SELECT emp.dept_id,
+ Avg(salary),
+ Sum(salary)
+FROM emp
+WHERE NOT EXISTS (SELECT state
+ FROM dept
+ WHERE dept.dept_id = emp.dept_id)
+GROUP BY dept_id
+-- !query 7 schema
+struct<dept_id:int,avg(salary):double,sum(salary):double>
+-- !query 7 output
+100 400.0 800.0
+NULL 400.0 400.0
+
+
+-- !query 8
+SELECT emp_name
+FROM emp
+WHERE NOT EXISTS (SELECT max(dept.dept_id) a
+ FROM dept
+ WHERE dept.dept_id = emp.dept_id
+ GROUP BY dept.dept_id)
+-- !query 8 schema
+struct<emp_name:string>
+-- !query 8 output
+emp 5
+emp 6 - no dept
+emp 7
+
+
+-- !query 9
+SELECT count(*)
+FROM emp
+WHERE NOT EXISTS (SELECT max(dept.dept_id) a
+ FROM dept
+ WHERE dept.dept_id = emp.dept_id
+ GROUP BY dept.dept_id)
+-- !query 9 schema
+struct<count(1):bigint>
+-- !query 9 output
+3
+
+
+-- !query 10
+SELECT *
+FROM bonus
+WHERE NOT EXISTS (SELECT 1
+ FROM emp
+ WHERE emp.emp_name = bonus.emp_name
+ AND EXISTS (SELECT Max(dept.dept_id)
+ FROM dept
+ WHERE emp.dept_id = dept.dept_id
+ GROUP BY dept.dept_id))
+-- !query 10 schema
+struct<emp_name:string,bonus_amt:double>
+-- !query 10 output
+emp 5 1000.0
+emp 6 - no dept 500.0
--- End diff --
I have compared the result set matched with the result from DB2.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]