[ 
https://issues.apache.org/jira/browse/DRILL-5064?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Khurram Faraaz updated DRILL-5064:
----------------------------------
    Description: 
The problem exists on Drill 1.9.0 git commit ID: 4312d65 (i.e difference in 
results) even without the nullability check ... OR ( t1.col_intrvl_day IS NULL 
AND t2.col_intrvl_day IS NULL ). A simple equi-join involving interval type 
columns returns wrong results.

{noformat}
postgres=# SELECT t1.col_intrvl_dy , t2.col_intrvl_dy
postgres-# FROM typeall_l t1 INNER JOIN  typeall_r t2
postgres-# ON  t1.col_intrvl_dy= t2.col_intrvl_dy;
      col_intrvl_dy       |      col_intrvl_dy
--------------------------+--------------------------
 9 years 10 mons 9 days   | 9 years 10 mons 9 days
 25 years 11 mons 22 days | 25 years 11 mons 22 days
 23 years 10 mons 25 days | 23 years 10 mons 25 days
 26 years 8 mons 12 days  | 26 years 8 mons 12 days
 13 years 6 mons 31 days  | 13 years 7 mons 1 day
 27 years 11 mons 17 days | 27 years 11 mons 17 days
 27 years 11 mons 17 days | 27 years 11 mons 17 days
 23 years 5 mons 20 days  | 23 years 5 mons 20 days
 14 years 7 mons 12 days  | 14 years 7 mons 12 days
 4 years 10 mons 11 days  | 4 years 10 mons 11 days
 21 years 31 days         | 21 years 1 mon 1 day
 23 years 10 mons 25 days | 23 years 10 mons 25 days
(12 rows)
{noformat}

{noformat}
0: jdbc:drill:schema=dfs.tmp> SELECT t1.col_intrvl_day , t2.col_intrvl_day
. . . . . . . . . . . . . . > FROM typeall_l t1 INNER JOIN  typeall_r t2
. . . . . . . . . . . . . . > ON  t1.col_intrvl_day= t2.col_intrvl_day;
+-----------------+------------------+
| col_intrvl_day  | col_intrvl_day0  |
+-----------------+------------------+
| P9DT33156S      | P9DT33156S       |
| P22DT25501S     | P22DT25501S      |
| P25DT10592S     | P25DT10592S      |
| P17DT30024S     | P17DT30024S      |
| P17DT30024S     | P17DT30024S      |
+-----------------+------------------+
5 rows selected (0.515 seconds)
{noformat}

Parquet schema details of the two files used in the join query

{noformat}
[root@centos-01 intrvl]# ~/parquet-tools/parquet-schema typeall_l/0_0_0.parquet
message root {
  optional int32 col_int;
  optional binary col_chr (UTF8);
  optional binary col_vrchr1 (UTF8);
  optional binary col_vrchr2 (UTF8);
  optional int32 col_dt (DATE);
  optional int32 col_tim (TIME_MILLIS);
  optional int64 col_tmstmp (TIMESTAMP_MILLIS);
  optional float col_flt;
  optional fixed_len_byte_array(12) col_intrvl_yr (INTERVAL);
  optional fixed_len_byte_array(12) col_intrvl_day (INTERVAL);
  optional boolean col_bln;
}

[root@centos-01 intrvl]# ~/parquet-tools/parquet-schema typeall_r/0_0_0.parquet
message root {
  optional int32 col_int;
  optional binary col_chr (UTF8);
  optional binary col_vrchr1 (UTF8);
  optional binary col_vrchr2 (UTF8);
  optional int32 col_dt (DATE);
  optional int32 col_tim (TIME_MILLIS);
  optional int64 col_tmstmp (TIMESTAMP_MILLIS);
  optional float col_flt;
  optional fixed_len_byte_array(12) col_intrvl_yr (INTERVAL);
  optional fixed_len_byte_array(12) col_intrvl_day (INTERVAL);
  optional boolean col_bln;
}
{noformat}

  was:

The problem exists on Drill 1.9.0 git commit ID: 4312d65 (i.e difference in 
results) even without the nullability check ... OR ( t1.col_intrvl_day IS NULL 
AND t2.col_intrvl_day IS NULL ). A simple equi-join involving interval type 
columns returns wrong results.

{noformat}
postgres=# SELECT t1.col_intrvl_dy , t2.col_intrvl_dy
postgres-# FROM typeall_l t1 INNER JOIN  typeall_r t2
postgres-# ON  t1.col_intrvl_dy= t2.col_intrvl_dy;
      col_intrvl_dy       |      col_intrvl_dy
--------------------------+--------------------------
 9 years 10 mons 9 days   | 9 years 10 mons 9 days
 25 years 11 mons 22 days | 25 years 11 mons 22 days
 23 years 10 mons 25 days | 23 years 10 mons 25 days
 26 years 8 mons 12 days  | 26 years 8 mons 12 days
 13 years 6 mons 31 days  | 13 years 7 mons 1 day
 27 years 11 mons 17 days | 27 years 11 mons 17 days
 27 years 11 mons 17 days | 27 years 11 mons 17 days
 23 years 5 mons 20 days  | 23 years 5 mons 20 days
 14 years 7 mons 12 days  | 14 years 7 mons 12 days
 4 years 10 mons 11 days  | 4 years 10 mons 11 days
 21 years 31 days         | 21 years 1 mon 1 day
 23 years 10 mons 25 days | 23 years 10 mons 25 days
(12 rows)
{nformat}

{noformat}
0: jdbc:drill:schema=dfs.tmp> SELECT t1.col_intrvl_day , t2.col_intrvl_day
. . . . . . . . . . . . . . > FROM typeall_l t1 INNER JOIN  typeall_r t2
. . . . . . . . . . . . . . > ON  t1.col_intrvl_day= t2.col_intrvl_day;
+-----------------+------------------+
| col_intrvl_day  | col_intrvl_day0  |
+-----------------+------------------+
| P9DT33156S      | P9DT33156S       |
| P22DT25501S     | P22DT25501S      |
| P25DT10592S     | P25DT10592S      |
| P17DT30024S     | P17DT30024S      |
| P17DT30024S     | P17DT30024S      |
+-----------------+------------------+
5 rows selected (0.515 seconds)
{noformat}

Parquet schema details of the two files used in the join query

[root@centos-01 intrvl]# ~/parquet-tools/parquet-schema typeall_l/0_0_0.parquet
message root {
  optional int32 col_int;
  optional binary col_chr (UTF8);
  optional binary col_vrchr1 (UTF8);
  optional binary col_vrchr2 (UTF8);
  optional int32 col_dt (DATE);
  optional int32 col_tim (TIME_MILLIS);
  optional int64 col_tmstmp (TIMESTAMP_MILLIS);
  optional float col_flt;
  optional fixed_len_byte_array(12) col_intrvl_yr (INTERVAL);
  optional fixed_len_byte_array(12) col_intrvl_day (INTERVAL);
  optional boolean col_bln;
}

[root@centos-01 intrvl]# ~/parquet-tools/parquet-schema typeall_r/0_0_0.parquet
message root {
  optional int32 col_int;
  optional binary col_chr (UTF8);
  optional binary col_vrchr1 (UTF8);
  optional binary col_vrchr2 (UTF8);
  optional int32 col_dt (DATE);
  optional int32 col_tim (TIME_MILLIS);
  optional int64 col_tmstmp (TIMESTAMP_MILLIS);
  optional float col_flt;
  optional fixed_len_byte_array(12) col_intrvl_yr (INTERVAL);
  optional fixed_len_byte_array(12) col_intrvl_day (INTERVAL);
  optional boolean col_bln;
}


> wrong results - equijoin involving interval day type columns
> ------------------------------------------------------------
>
>                 Key: DRILL-5064
>                 URL: https://issues.apache.org/jira/browse/DRILL-5064
>             Project: Apache Drill
>          Issue Type: Bug
>          Components: Execution - Flow
>    Affects Versions: 1.9.0
>            Reporter: Khurram Faraaz
>         Attachments: typeall_l.parquet, typeall_r.parquet
>
>
> The problem exists on Drill 1.9.0 git commit ID: 4312d65 (i.e difference in 
> results) even without the nullability check ... OR ( t1.col_intrvl_day IS 
> NULL AND t2.col_intrvl_day IS NULL ). A simple equi-join involving interval 
> type columns returns wrong results.
> {noformat}
> postgres=# SELECT t1.col_intrvl_dy , t2.col_intrvl_dy
> postgres-# FROM typeall_l t1 INNER JOIN  typeall_r t2
> postgres-# ON  t1.col_intrvl_dy= t2.col_intrvl_dy;
>       col_intrvl_dy       |      col_intrvl_dy
> --------------------------+--------------------------
>  9 years 10 mons 9 days   | 9 years 10 mons 9 days
>  25 years 11 mons 22 days | 25 years 11 mons 22 days
>  23 years 10 mons 25 days | 23 years 10 mons 25 days
>  26 years 8 mons 12 days  | 26 years 8 mons 12 days
>  13 years 6 mons 31 days  | 13 years 7 mons 1 day
>  27 years 11 mons 17 days | 27 years 11 mons 17 days
>  27 years 11 mons 17 days | 27 years 11 mons 17 days
>  23 years 5 mons 20 days  | 23 years 5 mons 20 days
>  14 years 7 mons 12 days  | 14 years 7 mons 12 days
>  4 years 10 mons 11 days  | 4 years 10 mons 11 days
>  21 years 31 days         | 21 years 1 mon 1 day
>  23 years 10 mons 25 days | 23 years 10 mons 25 days
> (12 rows)
> {noformat}
> {noformat}
> 0: jdbc:drill:schema=dfs.tmp> SELECT t1.col_intrvl_day , t2.col_intrvl_day
> . . . . . . . . . . . . . . > FROM typeall_l t1 INNER JOIN  typeall_r t2
> . . . . . . . . . . . . . . > ON  t1.col_intrvl_day= t2.col_intrvl_day;
> +-----------------+------------------+
> | col_intrvl_day  | col_intrvl_day0  |
> +-----------------+------------------+
> | P9DT33156S      | P9DT33156S       |
> | P22DT25501S     | P22DT25501S      |
> | P25DT10592S     | P25DT10592S      |
> | P17DT30024S     | P17DT30024S      |
> | P17DT30024S     | P17DT30024S      |
> +-----------------+------------------+
> 5 rows selected (0.515 seconds)
> {noformat}
> Parquet schema details of the two files used in the join query
> {noformat}
> [root@centos-01 intrvl]# ~/parquet-tools/parquet-schema 
> typeall_l/0_0_0.parquet
> message root {
>   optional int32 col_int;
>   optional binary col_chr (UTF8);
>   optional binary col_vrchr1 (UTF8);
>   optional binary col_vrchr2 (UTF8);
>   optional int32 col_dt (DATE);
>   optional int32 col_tim (TIME_MILLIS);
>   optional int64 col_tmstmp (TIMESTAMP_MILLIS);
>   optional float col_flt;
>   optional fixed_len_byte_array(12) col_intrvl_yr (INTERVAL);
>   optional fixed_len_byte_array(12) col_intrvl_day (INTERVAL);
>   optional boolean col_bln;
> }
> [root@centos-01 intrvl]# ~/parquet-tools/parquet-schema 
> typeall_r/0_0_0.parquet
> message root {
>   optional int32 col_int;
>   optional binary col_chr (UTF8);
>   optional binary col_vrchr1 (UTF8);
>   optional binary col_vrchr2 (UTF8);
>   optional int32 col_dt (DATE);
>   optional int32 col_tim (TIME_MILLIS);
>   optional int64 col_tmstmp (TIMESTAMP_MILLIS);
>   optional float col_flt;
>   optional fixed_len_byte_array(12) col_intrvl_yr (INTERVAL);
>   optional fixed_len_byte_array(12) col_intrvl_day (INTERVAL);
>   optional boolean col_bln;
> }
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to