[ https://issues.apache.org/jira/browse/IMPALA-14200?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
weihua zhang updated IMPALA-14200: ---------------------------------- Description: {code:sql} CREATE TABLE dept( deptno INT, dname string, loc string ); CREATE TABLE emp( empno INT , ename string, job string, age INT, mgr INT, hiredate DATE, sal double, comm double, deptno INT, email string, create_datetime TIMESTAMP, upsert_time TIMESTAMP ); INSERT INTO DEPT VALUES (10, 'ACCOUNTING', 'NEW YORK'); INSERT INTO DEPT VALUES (20, 'RESEARCH', 'DALLAS'); INSERT INTO DEPT VALUES (30, 'SALES', 'CHICAGO'); INSERT INTO DEPT VALUES (40, 'OPERATIONS', 'BOSTON'); INSERT INTO EMP VALUES (7369, 'SMITH', 'CLERK', 30, 7902, '1980-12-17', 800, NULL, 20, 'smith@calcite', '2020-01-01 18:35:40', '2020-01-01 18:35:40'); INSERT INTO EMP VALUES (7499, 'ALLEN', 'SALESMAN', 24, 7698, '1981-02-20', 1600, 300, 30, 'allen@calcite', '2018-04-09 09:00:00', '2018-04-09 09:00:00'); INSERT INTO EMP VALUES (7521, 'WARD', 'SALESMAN', 41, 7698, '1981-02-22', 1250, 500, 30, 'ward@calcite', '2019-11-16 10:26:40', '2019-11-16 10:26:40'); INSERT INTO EMP VALUES (7566, 'JONES', 'MANAGER', 28, 7839, '1981-02-04', 2975, NULL, 20, 'jones@calcite', '2015-03-09 22:16:30', '2015-03-09 22:16:30'); INSERT INTO EMP VALUES (7654, 'MARTIN', 'SALESMAN', 27, 7698, '1981-09-28', 1250, 1400, 30, 'martin@calcite', '2018-09-02 12:12:56', '2018-09-02 12:12:56'); INSERT INTO EMP VALUES (7698, 'BLAKE', 'MANAGER', 38, 7839, '1981-01-05', 2850, NULL, 30, 'blake@calcite', '2018-06-01 14:45:00', '2018-06-01 14:45:00'); INSERT INTO EMP VALUES (7782, 'CLARK', 'MANAGER', 32, 7839, '1981-06-09', 2450, NULL, 10, NULL, '2019-09-30 02:14:56', '2019-09-30 02:14:56'); INSERT INTO EMP VALUES (7788, 'SCOTT', 'ANALYST', 45, 7566, '1987-04-19', 3000, NULL, 20, 'scott@calcite', '2019-07-28 12:12:12', '2019-07-28 12:12:12'); INSERT INTO EMP VALUES (7839, 'KING', 'PRESIDENT', 22, NULL, '1981-11-17', 5000, NULL, 10, 'king@calcite', '2019-06-08 15:15:15', '2019-06-08 15:15:15'); INSERT INTO EMP VALUES (7844, 'TURNER', 'SALESMAN', 54, 7698, '1981-09-08', 1500, 0, 30, 'turner@calcite', '2017-08-17 22:01:37', '2017-08-17 22:01:37'); INSERT INTO EMP VALUES (7876, 'ADAMS', 'CLERK', 35, 7788, '1987-05-23', 1100, NULL, 20, 'adams@calcite', NULL, '2017-08-18 23:11:06'); INSERT INTO EMP VALUES (7900, 'JAMES', 'CLERK', 40, 7698, '1981-12-03', 950, NULL, 30, 'james@calcite', '2020-01-02 12:19:00', '2020-01-02 12:19:00'); INSERT INTO EMP VALUES (7902, 'FORD', 'ANALYST', 28, 7566, '1981-12-03', 3000, NULL, 20, 'ford@calcite', '2019-05-29 00:00:00', '2019-05-29 00:00:00'); INSERT INTO EMP VALUES (7934, 'MILLER', 'CLERK', 32, 7782, '1982-01-23', 1300, NULL, 10, NULL, '2016-09-02 23:15:01', '2016-09-02 23:15:01'); {code} {code:sql} select count(*) filter (where empno in (select deptno from emp)) from emp; {code} maybe equal to: {code:sql} SELECT SUM(CASE WHEN empno IN (SELECT deptno FROM emp) THEN 1 ELSE 0 END) AS count_matching FROM emp; {code} {noformat} LogicalAggregate(group=[{}], EXPR$0=[COUNT() FILTER $0]), id = 7123 LogicalProject($f0=[AND(IS NOT NULL($13), IS NOT NULL($0))]), id = 7122 LogicalJoin(condition=[=($0, $12)], joinType=[left]), id = 7111 LogicalTableScan(table=[[db1, emp]]), id = 7050 LogicalProject(DEPTNO=[$0], i=[true]), id = 7120 LogicalAggregate(group=[{0}]), id = 7117 LogicalProject(DEPTNO=[$8]), id = 7107 LogicalTableScan(table=[[db1, emp]]), id = 7050 {noformat} was: {code:sql} CREATE TABLE dept( deptno INT, dname string, loc string ); CREATE TABLE emp( empno INT , ename string, job string, age INT, mgr INT, hiredate DATE, sal double, comm double, deptno INT, email string, create_datetime TIMESTAMP, upsert_time TIMESTAMP ); INSERT INTO DEPT VALUES (10, 'ACCOUNTING', 'NEW YORK'); INSERT INTO DEPT VALUES (20, 'RESEARCH', 'DALLAS'); INSERT INTO DEPT VALUES (30, 'SALES', 'CHICAGO'); INSERT INTO DEPT VALUES (40, 'OPERATIONS', 'BOSTON'); INSERT INTO EMP VALUES (7369, 'SMITH', 'CLERK', 30, 7902, '1980-12-17', 800, NULL, 20, 'smith@calcite', '2020-01-01 18:35:40', '2020-01-01 18:35:40'); INSERT INTO EMP VALUES (7499, 'ALLEN', 'SALESMAN', 24, 7698, '1981-02-20', 1600, 300, 30, 'allen@calcite', '2018-04-09 09:00:00', '2018-04-09 09:00:00'); INSERT INTO EMP VALUES (7521, 'WARD', 'SALESMAN', 41, 7698, '1981-02-22', 1250, 500, 30, 'ward@calcite', '2019-11-16 10:26:40', '2019-11-16 10:26:40'); INSERT INTO EMP VALUES (7566, 'JONES', 'MANAGER', 28, 7839, '1981-02-04', 2975, NULL, 20, 'jones@calcite', '2015-03-09 22:16:30', '2015-03-09 22:16:30'); INSERT INTO EMP VALUES (7654, 'MARTIN', 'SALESMAN', 27, 7698, '1981-09-28', 1250, 1400, 30, 'martin@calcite', '2018-09-02 12:12:56', '2018-09-02 12:12:56'); INSERT INTO EMP VALUES (7698, 'BLAKE', 'MANAGER', 38, 7839, '1981-01-05', 2850, NULL, 30, 'blake@calcite', '2018-06-01 14:45:00', '2018-06-01 14:45:00'); INSERT INTO EMP VALUES (7782, 'CLARK', 'MANAGER', 32, 7839, '1981-06-09', 2450, NULL, 10, NULL, '2019-09-30 02:14:56', '2019-09-30 02:14:56'); INSERT INTO EMP VALUES (7788, 'SCOTT', 'ANALYST', 45, 7566, '1987-04-19', 3000, NULL, 20, 'scott@calcite', '2019-07-28 12:12:12', '2019-07-28 12:12:12'); INSERT INTO EMP VALUES (7839, 'KING', 'PRESIDENT', 22, NULL, '1981-11-17', 5000, NULL, 10, 'king@calcite', '2019-06-08 15:15:15', '2019-06-08 15:15:15'); INSERT INTO EMP VALUES (7844, 'TURNER', 'SALESMAN', 54, 7698, '1981-09-08', 1500, 0, 30, 'turner@calcite', '2017-08-17 22:01:37', '2017-08-17 22:01:37'); INSERT INTO EMP VALUES (7876, 'ADAMS', 'CLERK', 35, 7788, '1987-05-23', 1100, NULL, 20, 'adams@calcite', NULL, '2017-08-18 23:11:06'); INSERT INTO EMP VALUES (7900, 'JAMES', 'CLERK', 40, 7698, '1981-12-03', 950, NULL, 30, 'james@calcite', '2020-01-02 12:19:00', '2020-01-02 12:19:00'); INSERT INTO EMP VALUES (7902, 'FORD', 'ANALYST', 28, 7566, '1981-12-03', 3000, NULL, 20, 'ford@calcite', '2019-05-29 00:00:00', '2019-05-29 00:00:00'); INSERT INTO EMP VALUES (7934, 'MILLER', 'CLERK', 32, 7782, '1982-01-23', 1300, NULL, 10, NULL, '2016-09-02 23:15:01', '2016-09-02 23:15:01'); select count(*) filter (where empno in (select deptno from emp)) from emp; {code} {noformat} LogicalAggregate(group=[{}], EXPR$0=[COUNT() FILTER $0]), id = 7123 LogicalProject($f0=[AND(IS NOT NULL($13), IS NOT NULL($0))]), id = 7122 LogicalJoin(condition=[=($0, $12)], joinType=[left]), id = 7111 LogicalTableScan(table=[[db1, emp]]), id = 7050 LogicalProject(DEPTNO=[$0], i=[true]), id = 7120 LogicalAggregate(group=[{0}]), id = 7117 LogicalProject(DEPTNO=[$8]), id = 7107 LogicalTableScan(table=[[db1, emp]]), id = 7050 {noformat} > Calcite Planner: Impala needs to implement a converter that transforms > COUNT() FILTER into a form that Impala can handle. > ------------------------------------------------------------------------------------------------------------------------- > > Key: IMPALA-14200 > URL: https://issues.apache.org/jira/browse/IMPALA-14200 > Project: IMPALA > Issue Type: Sub-task > Reporter: weihua zhang > Priority: Major > > {code:sql} > CREATE TABLE dept( > deptno INT, > dname string, > loc string > ); > CREATE TABLE emp( > empno INT , > ename string, > job string, > age INT, > mgr INT, > hiredate DATE, > sal double, > comm double, > deptno INT, > email string, > create_datetime TIMESTAMP, > upsert_time TIMESTAMP > ); > INSERT INTO DEPT VALUES (10, 'ACCOUNTING', 'NEW YORK'); > INSERT INTO DEPT VALUES (20, 'RESEARCH', 'DALLAS'); > INSERT INTO DEPT VALUES (30, 'SALES', 'CHICAGO'); > INSERT INTO DEPT VALUES (40, 'OPERATIONS', 'BOSTON'); > INSERT INTO EMP VALUES (7369, 'SMITH', 'CLERK', 30, 7902, '1980-12-17', 800, > NULL, 20, 'smith@calcite', '2020-01-01 18:35:40', '2020-01-01 18:35:40'); > INSERT INTO EMP VALUES (7499, 'ALLEN', 'SALESMAN', 24, 7698, '1981-02-20', > 1600, 300, 30, 'allen@calcite', '2018-04-09 09:00:00', '2018-04-09 > 09:00:00'); > INSERT INTO EMP VALUES (7521, 'WARD', 'SALESMAN', 41, 7698, '1981-02-22', > 1250, 500, 30, 'ward@calcite', '2019-11-16 10:26:40', '2019-11-16 10:26:40'); > INSERT INTO EMP VALUES (7566, 'JONES', 'MANAGER', 28, 7839, '1981-02-04', > 2975, NULL, 20, 'jones@calcite', '2015-03-09 22:16:30', '2015-03-09 > 22:16:30'); > INSERT INTO EMP VALUES (7654, 'MARTIN', 'SALESMAN', 27, 7698, '1981-09-28', > 1250, 1400, 30, 'martin@calcite', '2018-09-02 12:12:56', '2018-09-02 > 12:12:56'); > INSERT INTO EMP VALUES (7698, 'BLAKE', 'MANAGER', 38, 7839, '1981-01-05', > 2850, NULL, 30, 'blake@calcite', '2018-06-01 14:45:00', '2018-06-01 > 14:45:00'); > INSERT INTO EMP VALUES (7782, 'CLARK', 'MANAGER', 32, 7839, '1981-06-09', > 2450, NULL, 10, NULL, '2019-09-30 02:14:56', '2019-09-30 02:14:56'); > INSERT INTO EMP VALUES (7788, 'SCOTT', 'ANALYST', 45, 7566, '1987-04-19', > 3000, NULL, 20, 'scott@calcite', '2019-07-28 12:12:12', '2019-07-28 > 12:12:12'); > INSERT INTO EMP VALUES (7839, 'KING', 'PRESIDENT', 22, NULL, '1981-11-17', > 5000, NULL, 10, 'king@calcite', '2019-06-08 15:15:15', '2019-06-08 > 15:15:15'); > INSERT INTO EMP VALUES (7844, 'TURNER', 'SALESMAN', 54, 7698, '1981-09-08', > 1500, 0, 30, 'turner@calcite', '2017-08-17 22:01:37', '2017-08-17 22:01:37'); > INSERT INTO EMP VALUES (7876, 'ADAMS', 'CLERK', 35, 7788, '1987-05-23', 1100, > NULL, 20, 'adams@calcite', NULL, '2017-08-18 23:11:06'); > INSERT INTO EMP VALUES (7900, 'JAMES', 'CLERK', 40, 7698, '1981-12-03', 950, > NULL, 30, 'james@calcite', '2020-01-02 12:19:00', '2020-01-02 12:19:00'); > INSERT INTO EMP VALUES (7902, 'FORD', 'ANALYST', 28, 7566, '1981-12-03', > 3000, NULL, 20, 'ford@calcite', '2019-05-29 00:00:00', '2019-05-29 > 00:00:00'); > INSERT INTO EMP VALUES (7934, 'MILLER', 'CLERK', 32, 7782, '1982-01-23', > 1300, NULL, 10, NULL, '2016-09-02 23:15:01', '2016-09-02 23:15:01'); > {code} > {code:sql} > select > count(*) filter (where empno in (select deptno from emp)) > from emp; > {code} > maybe equal to: > {code:sql} > SELECT > SUM(CASE WHEN empno IN (SELECT deptno FROM emp) THEN 1 ELSE 0 END) AS > count_matching > FROM emp; > {code} > {noformat} > LogicalAggregate(group=[{}], EXPR$0=[COUNT() FILTER $0]), id = 7123 > LogicalProject($f0=[AND(IS NOT NULL($13), IS NOT NULL($0))]), id = 7122 > LogicalJoin(condition=[=($0, $12)], joinType=[left]), id = 7111 > LogicalTableScan(table=[[db1, emp]]), id = 7050 > LogicalProject(DEPTNO=[$0], i=[true]), id = 7120 > LogicalAggregate(group=[{0}]), id = 7117 > LogicalProject(DEPTNO=[$8]), id = 7107 > LogicalTableScan(table=[[db1, emp]]), id = 7050 > {noformat} -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-all-unsubscr...@impala.apache.org For additional commands, e-mail: issues-all-h...@impala.apache.org