[
https://issues.apache.org/jira/browse/DRILL-4141?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Rahul Challapalli updated DRILL-4141:
-------------------------------------
Attachment: fewtypes_null.tbl.gz
drop table if exists fewtypes_null_compressed_gz;
create external table fewtypes_null_compressed_gz (
int_col int,
bigint_col bigint,
date_col date,
time_col string,
timestamp_col timestamp,
interval_col string,
varchar_col string,
float_col float,
double_col double,
bool_col boolean
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY "|"
LOCATION '/drill/testdata/hive_storage/fewtypes_null.tbl.gz'
TBLPROPERTIES ("serialization.null.format"="null");
drop table if exists fewtypes_null_compressed_rc_snappy;
create external table fewtypes_null_compressed_rc_snappy (
int_col int,
bigint_col bigint,
date_col date,
time_col string,
timestamp_col timestamp,
interval_col string,
varchar_col string,
float_col float,
double_col double,
bool_col boolean
)
STORED AS rcfile
LOCATION '/drill/testdata/hive_storage/fewtypes_null_rc_snappy';
SET hive.exec.compress.output=true;
SET mapred.output.compression.type=BLOCK;
SET SET
mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
INSERT OVERWRITE TABLE fewtypes_null_compressed_rc_snappy select * from
fewtypes_null_compressed_gz;
{code}
> Hive Plugin : Timestamp value in an RCfile (Snappy compression) is wrongly
> interpreted
> ----------------------------------------------------------------------------------------
>
> Key: DRILL-4141
> URL: https://issues.apache.org/jira/browse/DRILL-4141
> Project: Apache Drill
> Issue Type: Bug
> Components: Storage - Hive
> Reporter: Rahul Challapalli
> Priority: Critical
> Attachments: fewtypes_null.tbl.gz
>
>
> git.commit.id.abbrev=f7a0d38
> The below should have returned "1996-02-28 17:32:01.0". However it returns an
> incorrect value
> {code}
> select timestamp_col from hive.fewtypes_null_compressed_rc_snappy where
> int_col=20;
> +------------------------+
> | timestamp_col |
> +------------------------+
> | 1996-02-29 01:32:01.0 |
> +------------------------+
> 1 row selected (0.715 seconds)
> {code}
> This is not related to the timezone of the sqlline client as the below query
> from the same client returns the proper result. However this time the data is
> stored in a sequence file with snappy compression
> {code}
> select timestamp_col from hive.fewtypes_null_compressed_seq_snappy where
> int_col=20;
> +------------------------+
> | timestamp_col |
> +------------------------+
> | 1996-02-28 17:32:01.0 |
> +------------------------+
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)