[
https://issues.apache.org/jira/browse/DRILL-5581?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Khurram Faraaz updated DRILL-5581:
----------------------------------
Description:
A query that uses case statement, returns wrong results.
{noformat}
Apache Drill 1.11.0-SNAPSHOT, commit id: 874bf629
[test@centos-101 ~]# cat order_sample.csv
202634342,20000101,20160301
apache drill 1.11.0-SNAPSHOT
"this isn't your grandfather's sql"
0: jdbc:drill:schema=dfs.tmp> ALTER SESSION SET `store.format`='csv';
+-------+------------------------+
| ok | summary |
+-------+------------------------+
| true | store.format updated. |
+-------+------------------------+
1 row selected (0.245 seconds)
0: jdbc:drill:schema=dfs.tmp> CREATE VIEW `vw_order_sample_csv` as
. . . . . . . . . . . . . . > SELECT
. . . . . . . . . . . . . . > `columns`[0] AS `ND`,
. . . . . . . . . . . . . . > CAST(`columns`[1] AS BIGINT) AS `col1`,
. . . . . . . . . . . . . . > CAST(`columns`[2] AS BIGINT) AS `col2`
. . . . . . . . . . . . . . > FROM `order_sample.csv`;
+-------+----------------------------------------------------------------------+
| ok | summary |
+-------+----------------------------------------------------------------------+
| true | View 'vw_order_sample_csv' created successfully in 'dfs.tmp' schema |
+-------+----------------------------------------------------------------------+
1 row selected (0.253 seconds)
0: jdbc:drill:schema=dfs.tmp> select
. . . . . . . . . . . . . . > case
. . . . . . . . . . . . . . > when col1 > col2 then col1
. . . . . . . . . . . . . . > else col2
. . . . . . . . . . . . . . > end as temp_col,
. . . . . . . . . . . . . . > case
. . . . . . . . . . . . . . > when col1 = 20000101 and (20170302 - col2) >
10000 then 'D'
. . . . . . . . . . . . . . > when col2 = 20000101 then 'P'
. . . . . . . . . . . . . . > when col1 - col2 > 10000 then '0'
. . . . . . . . . . . . . . > else 'A'
. . . . . . . . . . . . . . > end as status
. . . . . . . . . . . . . . > from `vw_order_sample_csv`;
+-----------+---------+
| temp_col | status |
+-----------+---------+
| 20160301 | A |
+-----------+---------+
1 row selected (0.318 seconds)
0: jdbc:drill:schema=dfs.tmp> explain plan for
. . . . . . . . . . . . . . > select
. . . . . . . . . . . . . . > case
. . . . . . . . . . . . . . > when col1 > col2 then col1
. . . . . . . . . . . . . . > else col2
. . . . . . . . . . . . . . > end as temp_col,
. . . . . . . . . . . . . . > case
. . . . . . . . . . . . . . > when col1 = 20000101 and (20170302 - col2) >
10000 then 'D'
. . . . . . . . . . . . . . > when col2 = 20000101 then 'P'
. . . . . . . . . . . . . . > when col1 - col2 > 10000 then '0'
. . . . . . . . . . . . . . > else 'A'
. . . . . . . . . . . . . . > end as status
. . . . . . . . . . . . . . > from `vw_order_sample_csv`;
+------+------+
| text | json |
+------+------+
| 00-00 Screen
00-01 Project(temp_col=[CASE(>(CAST(ITEM($0, 1)):BIGINT, CAST(ITEM($0,
2)):BIGINT), CAST(ITEM($0, 1)):BIGINT, CAST(ITEM($0, 2)):BIGINT)],
status=[CASE(AND(=(CAST(ITEM($0, 1)):BIGINT, 20000101), >(-(20170302,
CAST(ITEM($0, 2)):BIGINT), 10000)), 'D', =(CAST(ITEM($0, 2)):BIGINT, 20000101),
'P', >(-(CAST(ITEM($0, 1)):BIGINT, CAST(ITEM($0, 2)):BIGINT), 10000), '0',
'A')])
00-02 Scan(groupscan=[EasyGroupScan
[selectionRoot=maprfs:/tmp/order_sample.csv, numFiles=1, columns=[`columns`[1],
`columns`[2]], files=[maprfs:///tmp/order_sample.csv]]])
// Details of Java compiler from sys.options
0: jdbc:drill:schema=dfs.tmp> select name, status from sys.options where name
like '%java_compiler%';
+----------------------------------------+----------+
| name | status |
+----------------------------------------+----------+
| exec.java.compiler.exp_in_method_size | DEFAULT |
| exec.java_compiler | DEFAULT |
| exec.java_compiler_debug | DEFAULT |
| exec.java_compiler_janino_maxsize | DEFAULT |
+----------------------------------------+----------+
4 rows selected (0.21 seconds)
{noformat}
Results from Postgres 9.3 for the same query, note the difference in results
{noformat}
postgres=# create table order_sample(c1 varchar(50), c2 bigint, c3 bigint);
CREATE TABLE
postgres=# insert into order_sample values('202634342',20000101,20160301);
INSERT 0 1
postgres=# select * from order_sample;
c1 | c2 | c3
-----------+----------+----------
202634342 | 20000101 | 20160301
(1 row)
postgres=# create view vw_order_sample_csv as
select
c1 as ND,
CAST(c2 AS BIGINT) AS col1,
CAST(c3 AS BIGINT) AS col2
FROM order_sample;
CREATE VIEW
postgres=# select
postgres-# case
postgres-# when col1 > col2 then col1
postgres-# else col2
postgres-# end as temp_col,
postgres-# case
postgres-# when col1 = 20000101 and (20170302 - col2) > 10000 then 'D'
postgres-# when col2 = 20000101 then 'P'
postgres-# when col1 - col2 > 10000 then '0'
postgres-# else 'A'
postgres-# end as status
postgres-# from vw_order_sample_csv;
temp_col | status
----------+--------
20160301 | D
(1 row)
{noformat}
was:
A query that uses case statement, returns wrong results.
{noformat}
Apache Drill 1.11.0-SNAPSHOT, commit id: 874bf629
[test@centos-101 ~]# cat order_sample.csv
202634342,20000101,20160301
apache drill 1.11.0-SNAPSHOT
"this isn't your grandfather's sql"
0: jdbc:drill:schema=dfs.tmp> ALTER SESSION SET `store.format`='csv';
+-------+------------------------+
| ok | summary |
+-------+------------------------+
| true | store.format updated. |
+-------+------------------------+
1 row selected (0.245 seconds)
0: jdbc:drill:schema=dfs.tmp> CREATE VIEW `vw_order_sample_csv` as
. . . . . . . . . . . . . . > SELECT
. . . . . . . . . . . . . . > `columns`[0] AS `ND`,
. . . . . . . . . . . . . . > CAST(`columns`[1] AS BIGINT) AS `col1`,
. . . . . . . . . . . . . . > CAST(`columns`[2] AS BIGINT) AS `col2`
. . . . . . . . . . . . . . > FROM `order_sample.csv`;
+-------+----------------------------------------------------------------------+
| ok | summary |
+-------+----------------------------------------------------------------------+
| true | View 'vw_order_sample_csv' created successfully in 'dfs.tmp' schema |
+-------+----------------------------------------------------------------------+
1 row selected (0.253 seconds)
0: jdbc:drill:schema=dfs.tmp> select
. . . . . . . . . . . . . . > case
. . . . . . . . . . . . . . > when col1 > col2 then col1
. . . . . . . . . . . . . . > else col2
. . . . . . . . . . . . . . > end as temp_col,
. . . . . . . . . . . . . . > case
. . . . . . . . . . . . . . > when col1 = 20000101 and (20170302 - col2) >
10000 then 'D'
. . . . . . . . . . . . . . > when col2 = 20000101 then 'P'
. . . . . . . . . . . . . . > when col1 - col2 > 10000 then '0'
. . . . . . . . . . . . . . > else 'A'
. . . . . . . . . . . . . . > end as status
. . . . . . . . . . . . . . > from `vw_order_sample_csv`;
+-----------+---------+
| temp_col | status |
+-----------+---------+
| 20160301 | A |
+-----------+---------+
1 row selected (0.318 seconds)
0: jdbc:drill:schema=dfs.tmp> explain plan for
. . . . . . . . . . . . . . > select
. . . . . . . . . . . . . . > case
. . . . . . . . . . . . . . > when col1 > col2 then col1
. . . . . . . . . . . . . . > else col2
. . . . . . . . . . . . . . > end as temp_col,
. . . . . . . . . . . . . . > case
. . . . . . . . . . . . . . > when col1 = 20000101 and (20170302 - col2) >
10000 then 'D'
. . . . . . . . . . . . . . > when col2 = 20000101 then 'P'
. . . . . . . . . . . . . . > when col1 - col2 > 10000 then '0'
. . . . . . . . . . . . . . > else 'A'
. . . . . . . . . . . . . . > end as status
. . . . . . . . . . . . . . > from `vw_order_sample_csv`;
+------+------+
| text | json |
+------+------+
| 00-00 Screen
00-01 Project(temp_col=[CASE(>(CAST(ITEM($0, 1)):BIGINT, CAST(ITEM($0,
2)):BIGINT), CAST(ITEM($0, 1)):BIGINT, CAST(ITEM($0, 2)):BIGINT)],
status=[CASE(AND(=(CAST(ITEM($0, 1)):BIGINT, 20000101), >(-(20170302,
CAST(ITEM($0, 2)):BIGINT), 10000)), 'D', =(CAST(ITEM($0, 2)):BIGINT, 20000101),
'P', >(-(CAST(ITEM($0, 1)):BIGINT, CAST(ITEM($0, 2)):BIGINT), 10000), '0',
'A')])
00-02 Scan(groupscan=[EasyGroupScan
[selectionRoot=maprfs:/tmp/order_sample.csv, numFiles=1, columns=[`columns`[1],
`columns`[2]], files=[maprfs:///tmp/order_sample.csv]]])
// Details of Java compiler from sys.options
0: jdbc:drill:schema=dfs.tmp> select name, status from sys.options where name
like '%java_compiler%';
+----------------------------------------+----------+
| name | status |
+----------------------------------------+----------+
| exec.java.compiler.exp_in_method_size | DEFAULT |
| exec.java_compiler | DEFAULT |
| exec.java_compiler_debug | DEFAULT |
| exec.java_compiler_janino_maxsize | DEFAULT |
+----------------------------------------+----------+
4 rows selected (0.21 seconds)
{noformat}
> Query with CASE statement returns wrong results
> -----------------------------------------------
>
> Key: DRILL-5581
> URL: https://issues.apache.org/jira/browse/DRILL-5581
> Project: Apache Drill
> Issue Type: Bug
> Components: Execution - Flow
> Affects Versions: 1.11.0
> Reporter: Khurram Faraaz
> Assignee: Jinfeng Ni
>
> A query that uses case statement, returns wrong results.
> {noformat}
> Apache Drill 1.11.0-SNAPSHOT, commit id: 874bf629
> [test@centos-101 ~]# cat order_sample.csv
> 202634342,20000101,20160301
> apache drill 1.11.0-SNAPSHOT
> "this isn't your grandfather's sql"
> 0: jdbc:drill:schema=dfs.tmp> ALTER SESSION SET `store.format`='csv';
> +-------+------------------------+
> | ok | summary |
> +-------+------------------------+
> | true | store.format updated. |
> +-------+------------------------+
> 1 row selected (0.245 seconds)
> 0: jdbc:drill:schema=dfs.tmp> CREATE VIEW `vw_order_sample_csv` as
> . . . . . . . . . . . . . . > SELECT
> . . . . . . . . . . . . . . > `columns`[0] AS `ND`,
> . . . . . . . . . . . . . . > CAST(`columns`[1] AS BIGINT) AS `col1`,
> . . . . . . . . . . . . . . > CAST(`columns`[2] AS BIGINT) AS `col2`
> . . . . . . . . . . . . . . > FROM `order_sample.csv`;
> +-------+----------------------------------------------------------------------+
> | ok | summary
> |
> +-------+----------------------------------------------------------------------+
> | true | View 'vw_order_sample_csv' created successfully in 'dfs.tmp' schema
> |
> +-------+----------------------------------------------------------------------+
> 1 row selected (0.253 seconds)
> 0: jdbc:drill:schema=dfs.tmp> select
> . . . . . . . . . . . . . . > case
> . . . . . . . . . . . . . . > when col1 > col2 then col1
> . . . . . . . . . . . . . . > else col2
> . . . . . . . . . . . . . . > end as temp_col,
> . . . . . . . . . . . . . . > case
> . . . . . . . . . . . . . . > when col1 = 20000101 and (20170302 - col2) >
> 10000 then 'D'
> . . . . . . . . . . . . . . > when col2 = 20000101 then 'P'
> . . . . . . . . . . . . . . > when col1 - col2 > 10000 then '0'
> . . . . . . . . . . . . . . > else 'A'
> . . . . . . . . . . . . . . > end as status
> . . . . . . . . . . . . . . > from `vw_order_sample_csv`;
> +-----------+---------+
> | temp_col | status |
> +-----------+---------+
> | 20160301 | A |
> +-----------+---------+
> 1 row selected (0.318 seconds)
> 0: jdbc:drill:schema=dfs.tmp> explain plan for
> . . . . . . . . . . . . . . > select
> . . . . . . . . . . . . . . > case
> . . . . . . . . . . . . . . > when col1 > col2 then col1
> . . . . . . . . . . . . . . > else col2
> . . . . . . . . . . . . . . > end as temp_col,
> . . . . . . . . . . . . . . > case
> . . . . . . . . . . . . . . > when col1 = 20000101 and (20170302 - col2) >
> 10000 then 'D'
> . . . . . . . . . . . . . . > when col2 = 20000101 then 'P'
> . . . . . . . . . . . . . . > when col1 - col2 > 10000 then '0'
> . . . . . . . . . . . . . . > else 'A'
> . . . . . . . . . . . . . . > end as status
> . . . . . . . . . . . . . . > from `vw_order_sample_csv`;
> +------+------+
> | text | json |
> +------+------+
> | 00-00 Screen
> 00-01 Project(temp_col=[CASE(>(CAST(ITEM($0, 1)):BIGINT, CAST(ITEM($0,
> 2)):BIGINT), CAST(ITEM($0, 1)):BIGINT, CAST(ITEM($0, 2)):BIGINT)],
> status=[CASE(AND(=(CAST(ITEM($0, 1)):BIGINT, 20000101), >(-(20170302,
> CAST(ITEM($0, 2)):BIGINT), 10000)), 'D', =(CAST(ITEM($0, 2)):BIGINT,
> 20000101), 'P', >(-(CAST(ITEM($0, 1)):BIGINT, CAST(ITEM($0, 2)):BIGINT),
> 10000), '0', 'A')])
> 00-02 Scan(groupscan=[EasyGroupScan
> [selectionRoot=maprfs:/tmp/order_sample.csv, numFiles=1,
> columns=[`columns`[1], `columns`[2]],
> files=[maprfs:///tmp/order_sample.csv]]])
> // Details of Java compiler from sys.options
> 0: jdbc:drill:schema=dfs.tmp> select name, status from sys.options where name
> like '%java_compiler%';
> +----------------------------------------+----------+
> | name | status |
> +----------------------------------------+----------+
> | exec.java.compiler.exp_in_method_size | DEFAULT |
> | exec.java_compiler | DEFAULT |
> | exec.java_compiler_debug | DEFAULT |
> | exec.java_compiler_janino_maxsize | DEFAULT |
> +----------------------------------------+----------+
> 4 rows selected (0.21 seconds)
> {noformat}
> Results from Postgres 9.3 for the same query, note the difference in results
> {noformat}
> postgres=# create table order_sample(c1 varchar(50), c2 bigint, c3 bigint);
> CREATE TABLE
> postgres=# insert into order_sample values('202634342',20000101,20160301);
> INSERT 0 1
> postgres=# select * from order_sample;
> c1 | c2 | c3
> -----------+----------+----------
> 202634342 | 20000101 | 20160301
> (1 row)
> postgres=# create view vw_order_sample_csv as
> select
> c1 as ND,
> CAST(c2 AS BIGINT) AS col1,
> CAST(c3 AS BIGINT) AS col2
> FROM order_sample;
> CREATE VIEW
> postgres=# select
> postgres-# case
> postgres-# when col1 > col2 then col1
> postgres-# else col2
> postgres-# end as temp_col,
> postgres-# case
> postgres-# when col1 = 20000101 and (20170302 - col2) > 10000 then 'D'
> postgres-# when col2 = 20000101 then 'P'
> postgres-# when col1 - col2 > 10000 then '0'
> postgres-# else 'A'
> postgres-# end as status
> postgres-# from vw_order_sample_csv;
> temp_col | status
> ----------+--------
> 20160301 | D
> (1 row)
> {noformat}
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)