[ https://issues.apache.org/jira/browse/DRILL-5581?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Khurram Faraaz updated DRILL-5581: ---------------------------------- Description: A query that uses case statement, returns wrong results. {noformat} Apache Drill 1.11.0-SNAPSHOT, commit id: 874bf629 [test@centos-101 ~]# cat order_sample.csv 202634342,20000101,20160301 apache drill 1.11.0-SNAPSHOT "this isn't your grandfather's sql" 0: jdbc:drill:schema=dfs.tmp> ALTER SESSION SET `store.format`='csv'; +-------+------------------------+ | ok | summary | +-------+------------------------+ | true | store.format updated. | +-------+------------------------+ 1 row selected (0.245 seconds) 0: jdbc:drill:schema=dfs.tmp> CREATE VIEW `vw_order_sample_csv` as . . . . . . . . . . . . . . > SELECT . . . . . . . . . . . . . . > `columns`[0] AS `ND`, . . . . . . . . . . . . . . > CAST(`columns`[1] AS BIGINT) AS `col1`, . . . . . . . . . . . . . . > CAST(`columns`[2] AS BIGINT) AS `col2` . . . . . . . . . . . . . . > FROM `order_sample.csv`; +-------+----------------------------------------------------------------------+ | ok | summary | +-------+----------------------------------------------------------------------+ | true | View 'vw_order_sample_csv' created successfully in 'dfs.tmp' schema | +-------+----------------------------------------------------------------------+ 1 row selected (0.253 seconds) 0: jdbc:drill:schema=dfs.tmp> select . . . . . . . . . . . . . . > case . . . . . . . . . . . . . . > when col1 > col2 then col1 . . . . . . . . . . . . . . > else col2 . . . . . . . . . . . . . . > end as temp_col, . . . . . . . . . . . . . . > case . . . . . . . . . . . . . . > when col1 = 20000101 and (20170302 - col2) > 10000 then 'D' . . . . . . . . . . . . . . > when col2 = 20000101 then 'P' . . . . . . . . . . . . . . > when col1 - col2 > 10000 then '0' . . . . . . . . . . . . . . > else 'A' . . . . . . . . . . . . . . > end as status . . . . . . . . . . . . . . > from `vw_order_sample_csv`; +-----------+---------+ | temp_col | status | +-----------+---------+ | 20160301 | A | +-----------+---------+ 1 row selected (0.318 seconds) 0: jdbc:drill:schema=dfs.tmp> explain plan for . . . . . . . . . . . . . . > select . . . . . . . . . . . . . . > case . . . . . . . . . . . . . . > when col1 > col2 then col1 . . . . . . . . . . . . . . > else col2 . . . . . . . . . . . . . . > end as temp_col, . . . . . . . . . . . . . . > case . . . . . . . . . . . . . . > when col1 = 20000101 and (20170302 - col2) > 10000 then 'D' . . . . . . . . . . . . . . > when col2 = 20000101 then 'P' . . . . . . . . . . . . . . > when col1 - col2 > 10000 then '0' . . . . . . . . . . . . . . > else 'A' . . . . . . . . . . . . . . > end as status . . . . . . . . . . . . . . > from `vw_order_sample_csv`; +------+------+ | text | json | +------+------+ | 00-00 Screen 00-01 Project(temp_col=[CASE(>(CAST(ITEM($0, 1)):BIGINT, CAST(ITEM($0, 2)):BIGINT), CAST(ITEM($0, 1)):BIGINT, CAST(ITEM($0, 2)):BIGINT)], status=[CASE(AND(=(CAST(ITEM($0, 1)):BIGINT, 20000101), >(-(20170302, CAST(ITEM($0, 2)):BIGINT), 10000)), 'D', =(CAST(ITEM($0, 2)):BIGINT, 20000101), 'P', >(-(CAST(ITEM($0, 1)):BIGINT, CAST(ITEM($0, 2)):BIGINT), 10000), '0', 'A')]) 00-02 Scan(groupscan=[EasyGroupScan [selectionRoot=maprfs:/tmp/order_sample.csv, numFiles=1, columns=[`columns`[1], `columns`[2]], files=[maprfs:///tmp/order_sample.csv]]]) // Details of Java compiler from sys.options 0: jdbc:drill:schema=dfs.tmp> select name, status from sys.options where name like '%java_compiler%'; +----------------------------------------+----------+ | name | status | +----------------------------------------+----------+ | exec.java.compiler.exp_in_method_size | DEFAULT | | exec.java_compiler | DEFAULT | | exec.java_compiler_debug | DEFAULT | | exec.java_compiler_janino_maxsize | DEFAULT | +----------------------------------------+----------+ 4 rows selected (0.21 seconds) {noformat} Results from Postgres 9.3 for the same query, note the difference in results {noformat} postgres=# create table order_sample(c1 varchar(50), c2 bigint, c3 bigint); CREATE TABLE postgres=# insert into order_sample values('202634342',20000101,20160301); INSERT 0 1 postgres=# select * from order_sample; c1 | c2 | c3 -----------+----------+---------- 202634342 | 20000101 | 20160301 (1 row) postgres=# create view vw_order_sample_csv as select c1 as ND, CAST(c2 AS BIGINT) AS col1, CAST(c3 AS BIGINT) AS col2 FROM order_sample; CREATE VIEW postgres=# select postgres-# case postgres-# when col1 > col2 then col1 postgres-# else col2 postgres-# end as temp_col, postgres-# case postgres-# when col1 = 20000101 and (20170302 - col2) > 10000 then 'D' postgres-# when col2 = 20000101 then 'P' postgres-# when col1 - col2 > 10000 then '0' postgres-# else 'A' postgres-# end as status postgres-# from vw_order_sample_csv; temp_col | status ----------+-------- 20160301 | D (1 row) {noformat} was: A query that uses case statement, returns wrong results. {noformat} Apache Drill 1.11.0-SNAPSHOT, commit id: 874bf629 [test@centos-101 ~]# cat order_sample.csv 202634342,20000101,20160301 apache drill 1.11.0-SNAPSHOT "this isn't your grandfather's sql" 0: jdbc:drill:schema=dfs.tmp> ALTER SESSION SET `store.format`='csv'; +-------+------------------------+ | ok | summary | +-------+------------------------+ | true | store.format updated. | +-------+------------------------+ 1 row selected (0.245 seconds) 0: jdbc:drill:schema=dfs.tmp> CREATE VIEW `vw_order_sample_csv` as . . . . . . . . . . . . . . > SELECT . . . . . . . . . . . . . . > `columns`[0] AS `ND`, . . . . . . . . . . . . . . > CAST(`columns`[1] AS BIGINT) AS `col1`, . . . . . . . . . . . . . . > CAST(`columns`[2] AS BIGINT) AS `col2` . . . . . . . . . . . . . . > FROM `order_sample.csv`; +-------+----------------------------------------------------------------------+ | ok | summary | +-------+----------------------------------------------------------------------+ | true | View 'vw_order_sample_csv' created successfully in 'dfs.tmp' schema | +-------+----------------------------------------------------------------------+ 1 row selected (0.253 seconds) 0: jdbc:drill:schema=dfs.tmp> select . . . . . . . . . . . . . . > case . . . . . . . . . . . . . . > when col1 > col2 then col1 . . . . . . . . . . . . . . > else col2 . . . . . . . . . . . . . . > end as temp_col, . . . . . . . . . . . . . . > case . . . . . . . . . . . . . . > when col1 = 20000101 and (20170302 - col2) > 10000 then 'D' . . . . . . . . . . . . . . > when col2 = 20000101 then 'P' . . . . . . . . . . . . . . > when col1 - col2 > 10000 then '0' . . . . . . . . . . . . . . > else 'A' . . . . . . . . . . . . . . > end as status . . . . . . . . . . . . . . > from `vw_order_sample_csv`; +-----------+---------+ | temp_col | status | +-----------+---------+ | 20160301 | A | +-----------+---------+ 1 row selected (0.318 seconds) 0: jdbc:drill:schema=dfs.tmp> explain plan for . . . . . . . . . . . . . . > select . . . . . . . . . . . . . . > case . . . . . . . . . . . . . . > when col1 > col2 then col1 . . . . . . . . . . . . . . > else col2 . . . . . . . . . . . . . . > end as temp_col, . . . . . . . . . . . . . . > case . . . . . . . . . . . . . . > when col1 = 20000101 and (20170302 - col2) > 10000 then 'D' . . . . . . . . . . . . . . > when col2 = 20000101 then 'P' . . . . . . . . . . . . . . > when col1 - col2 > 10000 then '0' . . . . . . . . . . . . . . > else 'A' . . . . . . . . . . . . . . > end as status . . . . . . . . . . . . . . > from `vw_order_sample_csv`; +------+------+ | text | json | +------+------+ | 00-00 Screen 00-01 Project(temp_col=[CASE(>(CAST(ITEM($0, 1)):BIGINT, CAST(ITEM($0, 2)):BIGINT), CAST(ITEM($0, 1)):BIGINT, CAST(ITEM($0, 2)):BIGINT)], status=[CASE(AND(=(CAST(ITEM($0, 1)):BIGINT, 20000101), >(-(20170302, CAST(ITEM($0, 2)):BIGINT), 10000)), 'D', =(CAST(ITEM($0, 2)):BIGINT, 20000101), 'P', >(-(CAST(ITEM($0, 1)):BIGINT, CAST(ITEM($0, 2)):BIGINT), 10000), '0', 'A')]) 00-02 Scan(groupscan=[EasyGroupScan [selectionRoot=maprfs:/tmp/order_sample.csv, numFiles=1, columns=[`columns`[1], `columns`[2]], files=[maprfs:///tmp/order_sample.csv]]]) // Details of Java compiler from sys.options 0: jdbc:drill:schema=dfs.tmp> select name, status from sys.options where name like '%java_compiler%'; +----------------------------------------+----------+ | name | status | +----------------------------------------+----------+ | exec.java.compiler.exp_in_method_size | DEFAULT | | exec.java_compiler | DEFAULT | | exec.java_compiler_debug | DEFAULT | | exec.java_compiler_janino_maxsize | DEFAULT | +----------------------------------------+----------+ 4 rows selected (0.21 seconds) {noformat} > Query with CASE statement returns wrong results > ----------------------------------------------- > > Key: DRILL-5581 > URL: https://issues.apache.org/jira/browse/DRILL-5581 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Flow > Affects Versions: 1.11.0 > Reporter: Khurram Faraaz > Assignee: Jinfeng Ni > > A query that uses case statement, returns wrong results. > {noformat} > Apache Drill 1.11.0-SNAPSHOT, commit id: 874bf629 > [test@centos-101 ~]# cat order_sample.csv > 202634342,20000101,20160301 > apache drill 1.11.0-SNAPSHOT > "this isn't your grandfather's sql" > 0: jdbc:drill:schema=dfs.tmp> ALTER SESSION SET `store.format`='csv'; > +-------+------------------------+ > | ok | summary | > +-------+------------------------+ > | true | store.format updated. | > +-------+------------------------+ > 1 row selected (0.245 seconds) > 0: jdbc:drill:schema=dfs.tmp> CREATE VIEW `vw_order_sample_csv` as > . . . . . . . . . . . . . . > SELECT > . . . . . . . . . . . . . . > `columns`[0] AS `ND`, > . . . . . . . . . . . . . . > CAST(`columns`[1] AS BIGINT) AS `col1`, > . . . . . . . . . . . . . . > CAST(`columns`[2] AS BIGINT) AS `col2` > . . . . . . . . . . . . . . > FROM `order_sample.csv`; > +-------+----------------------------------------------------------------------+ > | ok | summary > | > +-------+----------------------------------------------------------------------+ > | true | View 'vw_order_sample_csv' created successfully in 'dfs.tmp' schema > | > +-------+----------------------------------------------------------------------+ > 1 row selected (0.253 seconds) > 0: jdbc:drill:schema=dfs.tmp> select > . . . . . . . . . . . . . . > case > . . . . . . . . . . . . . . > when col1 > col2 then col1 > . . . . . . . . . . . . . . > else col2 > . . . . . . . . . . . . . . > end as temp_col, > . . . . . . . . . . . . . . > case > . . . . . . . . . . . . . . > when col1 = 20000101 and (20170302 - col2) > > 10000 then 'D' > . . . . . . . . . . . . . . > when col2 = 20000101 then 'P' > . . . . . . . . . . . . . . > when col1 - col2 > 10000 then '0' > . . . . . . . . . . . . . . > else 'A' > . . . . . . . . . . . . . . > end as status > . . . . . . . . . . . . . . > from `vw_order_sample_csv`; > +-----------+---------+ > | temp_col | status | > +-----------+---------+ > | 20160301 | A | > +-----------+---------+ > 1 row selected (0.318 seconds) > 0: jdbc:drill:schema=dfs.tmp> explain plan for > . . . . . . . . . . . . . . > select > . . . . . . . . . . . . . . > case > . . . . . . . . . . . . . . > when col1 > col2 then col1 > . . . . . . . . . . . . . . > else col2 > . . . . . . . . . . . . . . > end as temp_col, > . . . . . . . . . . . . . . > case > . . . . . . . . . . . . . . > when col1 = 20000101 and (20170302 - col2) > > 10000 then 'D' > . . . . . . . . . . . . . . > when col2 = 20000101 then 'P' > . . . . . . . . . . . . . . > when col1 - col2 > 10000 then '0' > . . . . . . . . . . . . . . > else 'A' > . . . . . . . . . . . . . . > end as status > . . . . . . . . . . . . . . > from `vw_order_sample_csv`; > +------+------+ > | text | json | > +------+------+ > | 00-00 Screen > 00-01 Project(temp_col=[CASE(>(CAST(ITEM($0, 1)):BIGINT, CAST(ITEM($0, > 2)):BIGINT), CAST(ITEM($0, 1)):BIGINT, CAST(ITEM($0, 2)):BIGINT)], > status=[CASE(AND(=(CAST(ITEM($0, 1)):BIGINT, 20000101), >(-(20170302, > CAST(ITEM($0, 2)):BIGINT), 10000)), 'D', =(CAST(ITEM($0, 2)):BIGINT, > 20000101), 'P', >(-(CAST(ITEM($0, 1)):BIGINT, CAST(ITEM($0, 2)):BIGINT), > 10000), '0', 'A')]) > 00-02 Scan(groupscan=[EasyGroupScan > [selectionRoot=maprfs:/tmp/order_sample.csv, numFiles=1, > columns=[`columns`[1], `columns`[2]], > files=[maprfs:///tmp/order_sample.csv]]]) > // Details of Java compiler from sys.options > 0: jdbc:drill:schema=dfs.tmp> select name, status from sys.options where name > like '%java_compiler%'; > +----------------------------------------+----------+ > | name | status | > +----------------------------------------+----------+ > | exec.java.compiler.exp_in_method_size | DEFAULT | > | exec.java_compiler | DEFAULT | > | exec.java_compiler_debug | DEFAULT | > | exec.java_compiler_janino_maxsize | DEFAULT | > +----------------------------------------+----------+ > 4 rows selected (0.21 seconds) > {noformat} > Results from Postgres 9.3 for the same query, note the difference in results > {noformat} > postgres=# create table order_sample(c1 varchar(50), c2 bigint, c3 bigint); > CREATE TABLE > postgres=# insert into order_sample values('202634342',20000101,20160301); > INSERT 0 1 > postgres=# select * from order_sample; > c1 | c2 | c3 > -----------+----------+---------- > 202634342 | 20000101 | 20160301 > (1 row) > postgres=# create view vw_order_sample_csv as > select > c1 as ND, > CAST(c2 AS BIGINT) AS col1, > CAST(c3 AS BIGINT) AS col2 > FROM order_sample; > CREATE VIEW > postgres=# select > postgres-# case > postgres-# when col1 > col2 then col1 > postgres-# else col2 > postgres-# end as temp_col, > postgres-# case > postgres-# when col1 = 20000101 and (20170302 - col2) > 10000 then 'D' > postgres-# when col2 = 20000101 then 'P' > postgres-# when col1 - col2 > 10000 then '0' > postgres-# else 'A' > postgres-# end as status > postgres-# from vw_order_sample_csv; > temp_col | status > ----------+-------- > 20160301 | D > (1 row) > {noformat} -- This message was sent by Atlassian JIRA (v6.4.14#64029)