[ 
https://issues.apache.org/jira/browse/IMPALA-14200?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

weihua zhang updated IMPALA-14200:
----------------------------------
    Description: 
{code:sql}
CREATE TABLE dept(
    deptno INT,    
    dname string,
    loc string
);

CREATE TABLE emp(
    empno INT ,
    ename string,
    job string,
    age INT,
    mgr INT,
    hiredate DATE,
    sal double,    
    comm double,
    deptno INT,
    email string,
    create_datetime TIMESTAMP,  
    upsert_time TIMESTAMP
);

INSERT INTO DEPT VALUES (10, 'ACCOUNTING', 'NEW YORK');
INSERT INTO DEPT VALUES (20, 'RESEARCH', 'DALLAS');
INSERT INTO DEPT VALUES (30, 'SALES', 'CHICAGO');
INSERT INTO DEPT VALUES (40, 'OPERATIONS', 'BOSTON');

INSERT INTO EMP VALUES (7369, 'SMITH', 'CLERK', 30, 7902, '1980-12-17', 800, 
NULL, 20, 'smith@calcite', '2020-01-01 18:35:40',  '2020-01-01 18:35:40');
INSERT INTO EMP VALUES (7499, 'ALLEN', 'SALESMAN', 24, 7698, '1981-02-20', 
1600, 300, 30, 'allen@calcite', '2018-04-09 09:00:00',  '2018-04-09 09:00:00');
INSERT INTO EMP VALUES (7521, 'WARD', 'SALESMAN', 41, 7698, '1981-02-22', 1250, 
500, 30, 'ward@calcite', '2019-11-16 10:26:40',  '2019-11-16 10:26:40');
INSERT INTO EMP VALUES (7566, 'JONES', 'MANAGER', 28, 7839, '1981-02-04', 2975, 
NULL, 20, 'jones@calcite', '2015-03-09 22:16:30',  '2015-03-09 22:16:30');
INSERT INTO EMP VALUES (7654, 'MARTIN', 'SALESMAN', 27, 7698, '1981-09-28', 
1250, 1400, 30, 'martin@calcite', '2018-09-02 12:12:56',  '2018-09-02 
12:12:56');
INSERT INTO EMP VALUES (7698, 'BLAKE', 'MANAGER', 38, 7839, '1981-01-05', 2850, 
NULL, 30, 'blake@calcite', '2018-06-01 14:45:00',  '2018-06-01 14:45:00');
INSERT INTO EMP VALUES (7782, 'CLARK', 'MANAGER', 32, 7839, '1981-06-09', 2450, 
NULL, 10, NULL, '2019-09-30 02:14:56',  '2019-09-30 02:14:56');
INSERT INTO EMP VALUES (7788, 'SCOTT', 'ANALYST', 45, 7566, '1987-04-19', 3000, 
NULL, 20, 'scott@calcite', '2019-07-28 12:12:12',  '2019-07-28 12:12:12');
INSERT INTO EMP VALUES (7839, 'KING', 'PRESIDENT', 22, NULL, '1981-11-17', 
5000, NULL, 10, 'king@calcite', '2019-06-08 15:15:15',  '2019-06-08 15:15:15');
INSERT INTO EMP VALUES (7844, 'TURNER', 'SALESMAN', 54, 7698, '1981-09-08', 
1500, 0, 30, 'turner@calcite', '2017-08-17 22:01:37',  '2017-08-17 22:01:37');
INSERT INTO EMP VALUES (7876, 'ADAMS', 'CLERK', 35, 7788, '1987-05-23', 1100, 
NULL, 20, 'adams@calcite', NULL,  '2017-08-18 23:11:06');
INSERT INTO EMP VALUES (7900, 'JAMES', 'CLERK', 40, 7698, '1981-12-03', 950, 
NULL, 30, 'james@calcite', '2020-01-02 12:19:00',  '2020-01-02 12:19:00');
INSERT INTO EMP VALUES (7902, 'FORD', 'ANALYST', 28, 7566, '1981-12-03', 3000, 
NULL, 20, 'ford@calcite', '2019-05-29 00:00:00',  '2019-05-29 00:00:00');
INSERT INTO EMP VALUES (7934, 'MILLER', 'CLERK', 32, 7782, '1982-01-23', 1300, 
NULL, 10, NULL, '2016-09-02 23:15:01',  '2016-09-02 23:15:01');
{code}


{code:sql}
select
  count(*) filter (where empno in (select deptno from emp))
from emp;
{code}

maybe equal to:

{code:sql}
 SELECT
  SUM(CASE WHEN empno IN (SELECT deptno FROM emp) THEN 1 ELSE 0 END) AS 
count_matching
FROM emp;
{code}



{noformat}
LogicalAggregate(group=[{}], EXPR$0=[COUNT() FILTER $0]), id = 7123
  LogicalProject($f0=[AND(IS NOT NULL($13), IS NOT NULL($0))]), id = 7122
    LogicalJoin(condition=[=($0, $12)], joinType=[left]), id = 7111
      LogicalTableScan(table=[[db1, emp]]), id = 7050
      LogicalProject(DEPTNO=[$0], i=[true]), id = 7120
        LogicalAggregate(group=[{0}]), id = 7117
          LogicalProject(DEPTNO=[$8]), id = 7107
            LogicalTableScan(table=[[db1, emp]]), id = 7050
{noformat}


  was:
{code:sql}
CREATE TABLE dept(
    deptno INT,    
    dname string,
    loc string
);

CREATE TABLE emp(
    empno INT ,
    ename string,
    job string,
    age INT,
    mgr INT,
    hiredate DATE,
    sal double,    
    comm double,
    deptno INT,
    email string,
    create_datetime TIMESTAMP,  
    upsert_time TIMESTAMP
);

INSERT INTO DEPT VALUES (10, 'ACCOUNTING', 'NEW YORK');
INSERT INTO DEPT VALUES (20, 'RESEARCH', 'DALLAS');
INSERT INTO DEPT VALUES (30, 'SALES', 'CHICAGO');
INSERT INTO DEPT VALUES (40, 'OPERATIONS', 'BOSTON');

INSERT INTO EMP VALUES (7369, 'SMITH', 'CLERK', 30, 7902, '1980-12-17', 800, 
NULL, 20, 'smith@calcite', '2020-01-01 18:35:40',  '2020-01-01 18:35:40');
INSERT INTO EMP VALUES (7499, 'ALLEN', 'SALESMAN', 24, 7698, '1981-02-20', 
1600, 300, 30, 'allen@calcite', '2018-04-09 09:00:00',  '2018-04-09 09:00:00');
INSERT INTO EMP VALUES (7521, 'WARD', 'SALESMAN', 41, 7698, '1981-02-22', 1250, 
500, 30, 'ward@calcite', '2019-11-16 10:26:40',  '2019-11-16 10:26:40');
INSERT INTO EMP VALUES (7566, 'JONES', 'MANAGER', 28, 7839, '1981-02-04', 2975, 
NULL, 20, 'jones@calcite', '2015-03-09 22:16:30',  '2015-03-09 22:16:30');
INSERT INTO EMP VALUES (7654, 'MARTIN', 'SALESMAN', 27, 7698, '1981-09-28', 
1250, 1400, 30, 'martin@calcite', '2018-09-02 12:12:56',  '2018-09-02 
12:12:56');
INSERT INTO EMP VALUES (7698, 'BLAKE', 'MANAGER', 38, 7839, '1981-01-05', 2850, 
NULL, 30, 'blake@calcite', '2018-06-01 14:45:00',  '2018-06-01 14:45:00');
INSERT INTO EMP VALUES (7782, 'CLARK', 'MANAGER', 32, 7839, '1981-06-09', 2450, 
NULL, 10, NULL, '2019-09-30 02:14:56',  '2019-09-30 02:14:56');
INSERT INTO EMP VALUES (7788, 'SCOTT', 'ANALYST', 45, 7566, '1987-04-19', 3000, 
NULL, 20, 'scott@calcite', '2019-07-28 12:12:12',  '2019-07-28 12:12:12');
INSERT INTO EMP VALUES (7839, 'KING', 'PRESIDENT', 22, NULL, '1981-11-17', 
5000, NULL, 10, 'king@calcite', '2019-06-08 15:15:15',  '2019-06-08 15:15:15');
INSERT INTO EMP VALUES (7844, 'TURNER', 'SALESMAN', 54, 7698, '1981-09-08', 
1500, 0, 30, 'turner@calcite', '2017-08-17 22:01:37',  '2017-08-17 22:01:37');
INSERT INTO EMP VALUES (7876, 'ADAMS', 'CLERK', 35, 7788, '1987-05-23', 1100, 
NULL, 20, 'adams@calcite', NULL,  '2017-08-18 23:11:06');
INSERT INTO EMP VALUES (7900, 'JAMES', 'CLERK', 40, 7698, '1981-12-03', 950, 
NULL, 30, 'james@calcite', '2020-01-02 12:19:00',  '2020-01-02 12:19:00');
INSERT INTO EMP VALUES (7902, 'FORD', 'ANALYST', 28, 7566, '1981-12-03', 3000, 
NULL, 20, 'ford@calcite', '2019-05-29 00:00:00',  '2019-05-29 00:00:00');
INSERT INTO EMP VALUES (7934, 'MILLER', 'CLERK', 32, 7782, '1982-01-23', 1300, 
NULL, 10, NULL, '2016-09-02 23:15:01',  '2016-09-02 23:15:01');

select
  count(*) filter (where empno in (select deptno from emp))
from emp;
{code}



{noformat}
LogicalAggregate(group=[{}], EXPR$0=[COUNT() FILTER $0]), id = 7123
  LogicalProject($f0=[AND(IS NOT NULL($13), IS NOT NULL($0))]), id = 7122
    LogicalJoin(condition=[=($0, $12)], joinType=[left]), id = 7111
      LogicalTableScan(table=[[db1, emp]]), id = 7050
      LogicalProject(DEPTNO=[$0], i=[true]), id = 7120
        LogicalAggregate(group=[{0}]), id = 7117
          LogicalProject(DEPTNO=[$8]), id = 7107
            LogicalTableScan(table=[[db1, emp]]), id = 7050
{noformat}



> Calcite Planner: Impala needs to implement a converter that transforms 
> COUNT() FILTER into a form that Impala can handle.
> -------------------------------------------------------------------------------------------------------------------------
>
>                 Key: IMPALA-14200
>                 URL: https://issues.apache.org/jira/browse/IMPALA-14200
>             Project: IMPALA
>          Issue Type: Sub-task
>            Reporter: weihua zhang
>            Priority: Major
>
> {code:sql}
> CREATE TABLE dept(
>     deptno INT,    
>     dname string,
>     loc string
> );
> CREATE TABLE emp(
>     empno INT ,
>     ename string,
>     job string,
>     age INT,
>     mgr INT,
>     hiredate DATE,
>     sal double,    
>     comm double,
>     deptno INT,
>     email string,
>     create_datetime TIMESTAMP,  
>     upsert_time TIMESTAMP
> );
> INSERT INTO DEPT VALUES (10, 'ACCOUNTING', 'NEW YORK');
> INSERT INTO DEPT VALUES (20, 'RESEARCH', 'DALLAS');
> INSERT INTO DEPT VALUES (30, 'SALES', 'CHICAGO');
> INSERT INTO DEPT VALUES (40, 'OPERATIONS', 'BOSTON');
> INSERT INTO EMP VALUES (7369, 'SMITH', 'CLERK', 30, 7902, '1980-12-17', 800, 
> NULL, 20, 'smith@calcite', '2020-01-01 18:35:40',  '2020-01-01 18:35:40');
> INSERT INTO EMP VALUES (7499, 'ALLEN', 'SALESMAN', 24, 7698, '1981-02-20', 
> 1600, 300, 30, 'allen@calcite', '2018-04-09 09:00:00',  '2018-04-09 
> 09:00:00');
> INSERT INTO EMP VALUES (7521, 'WARD', 'SALESMAN', 41, 7698, '1981-02-22', 
> 1250, 500, 30, 'ward@calcite', '2019-11-16 10:26:40',  '2019-11-16 10:26:40');
> INSERT INTO EMP VALUES (7566, 'JONES', 'MANAGER', 28, 7839, '1981-02-04', 
> 2975, NULL, 20, 'jones@calcite', '2015-03-09 22:16:30',  '2015-03-09 
> 22:16:30');
> INSERT INTO EMP VALUES (7654, 'MARTIN', 'SALESMAN', 27, 7698, '1981-09-28', 
> 1250, 1400, 30, 'martin@calcite', '2018-09-02 12:12:56',  '2018-09-02 
> 12:12:56');
> INSERT INTO EMP VALUES (7698, 'BLAKE', 'MANAGER', 38, 7839, '1981-01-05', 
> 2850, NULL, 30, 'blake@calcite', '2018-06-01 14:45:00',  '2018-06-01 
> 14:45:00');
> INSERT INTO EMP VALUES (7782, 'CLARK', 'MANAGER', 32, 7839, '1981-06-09', 
> 2450, NULL, 10, NULL, '2019-09-30 02:14:56',  '2019-09-30 02:14:56');
> INSERT INTO EMP VALUES (7788, 'SCOTT', 'ANALYST', 45, 7566, '1987-04-19', 
> 3000, NULL, 20, 'scott@calcite', '2019-07-28 12:12:12',  '2019-07-28 
> 12:12:12');
> INSERT INTO EMP VALUES (7839, 'KING', 'PRESIDENT', 22, NULL, '1981-11-17', 
> 5000, NULL, 10, 'king@calcite', '2019-06-08 15:15:15',  '2019-06-08 
> 15:15:15');
> INSERT INTO EMP VALUES (7844, 'TURNER', 'SALESMAN', 54, 7698, '1981-09-08', 
> 1500, 0, 30, 'turner@calcite', '2017-08-17 22:01:37',  '2017-08-17 22:01:37');
> INSERT INTO EMP VALUES (7876, 'ADAMS', 'CLERK', 35, 7788, '1987-05-23', 1100, 
> NULL, 20, 'adams@calcite', NULL,  '2017-08-18 23:11:06');
> INSERT INTO EMP VALUES (7900, 'JAMES', 'CLERK', 40, 7698, '1981-12-03', 950, 
> NULL, 30, 'james@calcite', '2020-01-02 12:19:00',  '2020-01-02 12:19:00');
> INSERT INTO EMP VALUES (7902, 'FORD', 'ANALYST', 28, 7566, '1981-12-03', 
> 3000, NULL, 20, 'ford@calcite', '2019-05-29 00:00:00',  '2019-05-29 
> 00:00:00');
> INSERT INTO EMP VALUES (7934, 'MILLER', 'CLERK', 32, 7782, '1982-01-23', 
> 1300, NULL, 10, NULL, '2016-09-02 23:15:01',  '2016-09-02 23:15:01');
> {code}
> {code:sql}
> select
>   count(*) filter (where empno in (select deptno from emp))
> from emp;
> {code}
> maybe equal to:
> {code:sql}
>  SELECT
>   SUM(CASE WHEN empno IN (SELECT deptno FROM emp) THEN 1 ELSE 0 END) AS 
> count_matching
> FROM emp;
> {code}
> {noformat}
> LogicalAggregate(group=[{}], EXPR$0=[COUNT() FILTER $0]), id = 7123
>   LogicalProject($f0=[AND(IS NOT NULL($13), IS NOT NULL($0))]), id = 7122
>     LogicalJoin(condition=[=($0, $12)], joinType=[left]), id = 7111
>       LogicalTableScan(table=[[db1, emp]]), id = 7050
>       LogicalProject(DEPTNO=[$0], i=[true]), id = 7120
>         LogicalAggregate(group=[{0}]), id = 7117
>           LogicalProject(DEPTNO=[$8]), id = 7107
>             LogicalTableScan(table=[[db1, emp]]), id = 7050
> {noformat}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-all-unsubscr...@impala.apache.org
For additional commands, e-mail: issues-all-h...@impala.apache.org

Reply via email to