[
https://issues.apache.org/jira/browse/HIVE-7787?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14137401#comment-14137401
]
Svend Vanderveken commented on HIVE-7787:
-----------------------------------------
I encounter a very similar issue with importing data from a hive external table
in raw CSV format into a parquet table with CDH 5.1
{code}
create external table if not exists testsv.objects_raw (
objectid string,
model string,
owner string,
attributes map<string,string>)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE
location '/test/somefolder;
{code}
(load some data in csv format in /test/somefolder)
{code}
create table if not exists testsv.objects (
objectid string,
model string,
owner string,
attributes map<string,string>)
ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'
STORED AS
INPUTFORMAT 'parquet.hive.DeprecatedParquetInputFormat'
OUTPUTFORMAT 'parquet.hive.DeprecatedParquetOutputFormat';
{code}
{code}
insert overwrite table testsv.objects select source.* from testsv.objects_raw
source;
{code}
{code}
2014-09-17 10:58:39,436 Stage-3 map = 100%, reduce = 0%
Ended Job = job_1410534905977_0011 with errors
Error during job, obtaining debugging information...
Examining task ID: task_1410534905977_0011_m_000000 (and more) from job
job_1410534905977_0011
Task with the most failures(4):
-----
Task ID:
task_1410534905977_0011_m_000000
URL:
http://vm28-hulk-priv:8088/taskdetails.jsp?jobid=job_1410534905977_0011&tipid=task_1410534905977_0011_m_000000
-----
Diagnostic Messages for this Task:
Error: java.io.IOException: java.lang.reflect.InvocationTargetException
at
org.apache.hadoop.hive.io.HiveIOExceptionHandlerChain.handleRecordReaderCreationException(HiveIOExceptionHandlerChain.java:97)
at
org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(HiveIOExceptionHandlerUtil.java:57)
at
org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileRecordReader.initNextRecordReader(HadoopShimsSecure.java:346)
at
org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileRecordReader.<init>(HadoopShimsSecure.java:293)
at
org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileInputFormatShim.getRecordReader(HadoopShimsSecure.java:407)
at
org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getRecordReader(CombineHiveInputFormat.java:560)
at
org.apache.hadoop.mapred.MapTask$TrackedRecordReader.<init>(MapTask.java:168)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:409)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:342)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:167)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1554)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:162)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
at
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
at
org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileRecordReader.initNextRecordReader(HadoopShimsSecure.java:332)
... 11 more
Caused by: java.lang.NoSuchFieldError: DECIMAL
at
org.apache.hadoop.hive.ql.io.parquet.convert.ETypeConverter.getNewConverter(ETypeConverter.java:146)
at
org.apache.hadoop.hive.ql.io.parquet.convert.HiveGroupConverter.getConverterFromDescription(HiveGroupConverter.java:31)
at
org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableGroupConverter.<init>(DataWritableGroupConverter.java:64)
at
org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableGroupConverter.<init>(DataWritableGroupConverter.java:40)
at
org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableRecordConverter.<init>(DataWritableRecordConverter.java:32)
at
org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport.prepareForRead(DataWritableReadSupport.java:128)
at
parquet.hadoop.InternalParquetRecordReader.initialize(InternalParquetRecordReader.java:142)
at
parquet.hadoop.ParquetRecordReader.initializeInternalReader(ParquetRecordReader.java:118)
at
parquet.hadoop.ParquetRecordReader.initialize(ParquetRecordReader.java:107)
at
org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper.<init>(ParquetRecordReaderWrapper.java:92)
at
org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper.<init>(ParquetRecordReaderWrapper.java:66)
at
org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat.getRecordReader(MapredParquetInputFormat.java:51)
at
org.apache.hadoop.hive.ql.io.CombineHiveRecordReader.<init>(CombineHiveRecordReader.java:65)
... 16 more
{code}
> Reading Parquet file with enum in Thrift Encoding throws NoSuchFieldError
> -------------------------------------------------------------------------
>
> Key: HIVE-7787
> URL: https://issues.apache.org/jira/browse/HIVE-7787
> Project: Hive
> Issue Type: Bug
> Components: Database/Schema, Thrift API
> Affects Versions: 0.12.0, 0.13.0, 0.12.1, 0.14.0, 0.13.1
> Environment: Hive 0.12 CDH 5.1.0, Hadoop 2.3.0 CDH 5.1.0
> Reporter: Raymond Lau
> Priority: Minor
>
> When reading Parquet file, where the original Thrift schema contains a struct
> with an enum, this causes the following error (full stack trace blow):
> {code}
> java.lang.NoSuchFieldError: DECIMAL.
> {code}
> Example Thrift Schema:
> {code}
> enum MyEnumType {
> EnumOne,
> EnumTwo,
> EnumThree
> }
> struct MyStruct {
> 1: optional MyEnumType myEnumType;
> 2: optional string field2;
> 3: optional string field3;
> }
> struct outerStruct {
> 1: optional list<MyStruct> myStructs
> }
> {code}
> Hive Table:
> {code}
> CREATE EXTERNAL TABLE mytable (
> mystructs array<struct<myenumtype: string, field2: string, field3: string>>
> )
> ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'
> STORED AS
> INPUTFORMAT 'parquet.hive.DeprecatedParquetInputFormat'
> OUTPUTFORMAT 'parquet.hive.DeprecatedParquetOutputFormat'
> ;
> {code}
> Error Stack trace:
> {code}
> Java stack trace for Hive 0.12:
> Caused by: java.lang.NoSuchFieldError: DECIMAL
> at
> org.apache.hadoop.hive.ql.io.parquet.convert.ETypeConverter.getNewConverter(ETypeConverter.java:146)
> at
> org.apache.hadoop.hive.ql.io.parquet.convert.HiveGroupConverter.getConverterFromDescription(HiveGroupConverter.java:31)
> at
> org.apache.hadoop.hive.ql.io.parquet.convert.ArrayWritableGroupConverter.<init>(ArrayWritableGroupConverter.java:45)
> at
> org.apache.hadoop.hive.ql.io.parquet.convert.HiveGroupConverter.getConverterFromDescription(HiveGroupConverter.java:34)
> at
> org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableGroupConverter.<init>(DataWritableGroupConverter.java:64)
> at
> org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableGroupConverter.<init>(DataWritableGroupConverter.java:47)
> at
> org.apache.hadoop.hive.ql.io.parquet.convert.HiveGroupConverter.getConverterFromDescription(HiveGroupConverter.java:36)
> at
> org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableGroupConverter.<init>(DataWritableGroupConverter.java:64)
> at
> org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableGroupConverter.<init>(DataWritableGroupConverter.java:40)
> at
> org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableRecordConverter.<init>(DataWritableRecordConverter.java:32)
> at
> org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport.prepareForRead(DataWritableReadSupport.java:128)
> at
> parquet.hadoop.InternalParquetRecordReader.initialize(InternalParquetRecordReader.java:142)
> at
> parquet.hadoop.ParquetRecordReader.initializeInternalReader(ParquetRecordReader.java:118)
> at
> parquet.hadoop.ParquetRecordReader.initialize(ParquetRecordReader.java:107)
> at
> org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper.<init>(ParquetRecordReaderWrapper.java:92)
> at
> org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper.<init>(ParquetRecordReaderWrapper.java:66)
> at
> org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat.getRecordReader(MapredParquetInputFormat.java:51)
> at
> org.apache.hadoop.hive.ql.io.CombineHiveRecordReader.<init>(CombineHiveRecordReader.java:65)
> ... 16 more
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)