[ 
https://issues.apache.org/jira/browse/DRILL-4141?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Rahul Challapalli updated DRILL-4141:
-------------------------------------
    Attachment: fewtypes_null.tbl.gz

drop table if exists fewtypes_null_compressed_gz;
create external table fewtypes_null_compressed_gz (
  int_col int,
  bigint_col bigint,
  date_col date,
  time_col string,
  timestamp_col timestamp,
  interval_col string,
  varchar_col string,
  float_col float,
  double_col double,
  bool_col boolean
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY "|"
LOCATION '/drill/testdata/hive_storage/fewtypes_null.tbl.gz'
TBLPROPERTIES ("serialization.null.format"="null");

drop table if exists fewtypes_null_compressed_rc_snappy;
create external table fewtypes_null_compressed_rc_snappy (
  int_col int,
  bigint_col bigint,
  date_col date,
  time_col string,
  timestamp_col timestamp,
  interval_col string,
  varchar_col string,
  float_col float,
  double_col double,
  bool_col boolean
)
STORED AS rcfile
LOCATION '/drill/testdata/hive_storage/fewtypes_null_rc_snappy';

SET hive.exec.compress.output=true;
SET mapred.output.compression.type=BLOCK;
SET SET 
mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;

INSERT OVERWRITE TABLE fewtypes_null_compressed_rc_snappy select * from 
fewtypes_null_compressed_gz;
{code}

> Hive Plugin :  Timestamp  value in an RCfile (Snappy compression) is wrongly 
> interpreted
> ----------------------------------------------------------------------------------------
>
>                 Key: DRILL-4141
>                 URL: https://issues.apache.org/jira/browse/DRILL-4141
>             Project: Apache Drill
>          Issue Type: Bug
>          Components: Storage - Hive
>            Reporter: Rahul Challapalli
>            Priority: Critical
>         Attachments: fewtypes_null.tbl.gz
>
>
> git.commit.id.abbrev=f7a0d38
> The below should have returned "1996-02-28 17:32:01.0". However it returns an 
> incorrect value
> {code}
> select timestamp_col from hive.fewtypes_null_compressed_rc_snappy where 
> int_col=20;
> +------------------------+
> |     timestamp_col      |
> +------------------------+
> | 1996-02-29 01:32:01.0  |
> +------------------------+
> 1 row selected (0.715 seconds)
> {code}
> This is not related to the timezone of the sqlline client as the below query 
> from the same client returns the proper result. However this time the data is 
>  stored in a sequence file with snappy compression
> {code}
> select timestamp_col from hive.fewtypes_null_compressed_seq_snappy where 
> int_col=20;
> +------------------------+
> |     timestamp_col      |
> +------------------------+
> | 1996-02-28 17:32:01.0  |
> +------------------------+
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to