[ 
https://issues.apache.org/jira/browse/HIVE-18393?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16329209#comment-16329209
 ] 

Janaki Lahorani commented on HIVE-18393:
----------------------------------------

[~ashutoshc], [~alangates], [~owen.omalley], [~sershe]: If a column type is 
changed into String, VarChar or Char, query on the table gives runtime error on 
Parquet tables.  With this fix, it will return the actual value if the original 
type was TimeStamp, Decimal, Double, Float, BigInt, Int, SmallInt, Tinyint or 
Boolean.  Please let me know if you see any issues with the fix.

> Error returned when some other type is read as string from parquet tables
> -------------------------------------------------------------------------
>
>                 Key: HIVE-18393
>                 URL: https://issues.apache.org/jira/browse/HIVE-18393
>             Project: Hive
>          Issue Type: Bug
>            Reporter: Janaki Lahorani
>            Assignee: Janaki Lahorani
>            Priority: Major
>             Fix For: 3.0.0
>
>         Attachments: HIVE-18393.1.patch, HIVE-18393.1.patch, 
> HIVE-18393.2.patch, HIVE-18393.3.patch, HIVE-18393.4.patch, HIVE-18393.4.patch
>
>
> TimeStamp, Decimal, Double, Float, BigInt, Int, SmallInt, Tinyint and Boolean 
> when read as String, Varchar or Char should return the correct data.  Now 
> this results in error for parquet tables.
> Test Case:
> {code}
> drop table if exists testAltCol;
> create table testAltCol
> (cId              TINYINT,
>  cTimeStamp TIMESTAMP,
>  cDecimal   DECIMAL(38,18),
>  cDouble    DOUBLE,
>  cFloat           FLOAT,
>  cBigInt    BIGINT,
>  cInt     INT,
>  cSmallInt  SMALLINT,
>  cTinyint   TINYINT,
>  cBoolean   BOOLEAN);
> insert into testAltCol values
> (1,
>  '2017-11-07 09:02:49.999999999',
>  12345678901234567890.123456789012345678,
>  1.79e308,
>  3.4e38,
>  1234567890123456789,
>  1234567890,
>  12345,
>  123,
>  TRUE);
> insert into testAltCol values
> (2,
>  '1400-01-01 01:01:01.000000001',
>  1.1,
>  2.2,
>  3.3,
>  1,
>  2,
>  3,
>  4,
>  FALSE);
> insert into testAltCol values
> (3,
>  '1400-01-01 01:01:01.000000001',
>  10.1,
>  20.2,
>  30.3,
>  1234567890123456789,
>  1234567890,
>  12345,
>  123,
>  TRUE);
> select cId, cTimeStamp from testAltCol order by cId;
> select cId, cDecimal, cDouble, cFloat from testAltCol order by cId;
> select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltCol order by cId;
> select cId, cBoolean from testAltCol order by cId;
> drop table if exists testAltColP;
> create table testAltColP stored as parquet as select * from testAltCol;
> select cId, cTimeStamp from testAltColP order by cId;
> select cId, cDecimal, cDouble, cFloat from testAltColP order by cId;
> select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColP order by cId;
> select cId, cBoolean from testAltColP order by cId;
> alter table testAltColP replace columns
> (cId              TINYINT,
>  cTimeStamp STRING,
>  cDecimal   STRING,
>  cDouble    STRING,
>  cFloat           STRING,
>  cBigInt    STRING,
>  cInt     STRING,
>  cSmallInt  STRING,
>  cTinyint   STRING,
>  cBoolean   STRING);
> select cId, cTimeStamp from testAltColP order by cId;
> select cId, cDecimal, cDouble, cFloat from testAltColP order by cId;
> select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColP order by cId;
> select cId, cBoolean from testAltColP order by cId;
> alter table testAltColP replace columns
> (cId              TINYINT,
>  cTimeStamp VARCHAR(100),
>  cDecimal   VARCHAR(100),
>  cDouble    VARCHAR(100),
>  cFloat           VARCHAR(100),
>  cBigInt    VARCHAR(100),
>  cInt     VARCHAR(100),
>  cSmallInt  VARCHAR(100),
>  cTinyint   VARCHAR(100),
>  cBoolean   VARCHAR(100));
> select cId, cTimeStamp from testAltColP order by cId;
> select cId, cDecimal, cDouble, cFloat from testAltColP order by cId;
> select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColP order by cId;
> select cId, cBoolean from testAltColP order by cId;
> alter table testAltColP replace columns
> (cId              TINYINT,
>  cTimeStamp CHAR(100),
>  cDecimal   CHAR(100),
>  cDouble    CHAR(100),
>  cFloat           CHAR(100),
>  cBigInt    CHAR(100),
>  cInt     CHAR(100),
>  cSmallInt  CHAR(100),
>  cTinyint   CHAR(100),
>  cBoolean   CHAR(100));
> select cId, cTimeStamp from testAltColP order by cId;
> select cId, cDecimal, cDouble, cFloat from testAltColP order by cId;
> select cId, cBigInt, cInt, cSmallInt, cTinyint from testAltColP order by cId;
> select cId, cBoolean from testAltColP order by cId;
> drop table if exists testAltColP;
> {code}
> {code}
> Error:
> FAILED: Execution Error, return code 2 from 
> org.apache.hadoop.hive.ql.exec.mr.MapRedTask
> Excerpt for log:
> 2018-01-05T15:54:05,756 ERROR [LocalJobRunner Map Task Executor #0] 
> mr.ExecMapper: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime 
> Error while processing row [Error getting row data with exception 
> java.lang.UnsupportedOperationException: Cannot inspect 
> org.apache.hadoop.hive.serde2.io.TimestampWritable
>       at 
> org.apache.hadoop.hive.ql.io.parquet.serde.primitive.ParquetStringInspector.getPrimitiveJavaObject(ParquetStringInspector.java:77)
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to