tooptoop4 opened a new issue #1730:
URL: https://github.com/apache/hudi/issues/1730


   ```
   using hoodie 0.4.6 and spark 2.3.4
   
   run below in hiveserver2 (v2.3.4):
   
   CREATE EXTERNAL TABLE `someschema.mytbl`(
   col1 string,
   col2 string,
   col3 string)
   PARTITIONED BY ( 
     `mydate` string)
   ROW FORMAT SERDE 
     'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' 
   STORED AS INPUTFORMAT 
     'com.uber.hoodie.hadoop.HoodieInputFormat' 
   OUTPUTFORMAT 
     'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
   LOCATION
     's3a://redact/M5/table/mytbl'
     
     #use spark to create COW hudi parquet under 
s3://redact/M5/table/mytbl/2016/11/07/ and s3://redact/M/table/mytbl/2019/12/01/
     
     run below in hiveserver2:
     ALTER TABLE someschema.mytbl ADD IF NOT EXISTS 
PARTITION(mydate='2016-11-07')
   LOCATION 's3a://redact/M5/table/mytbl/2016/11/07/'
   ALTER TABLE someschema.mytbl ADD IF NOT EXISTS PARTITION(mydate='2019-12-01')
   LOCATION 's3a://redact/M/table/mytbl/2019/12/01/'
     
     
     hive metastore shows below 2 rows:
     
     select TBLS.TBL_NAME,PARTITIONS.PART_NAME,SDS.LOCATION
   from SDS,TBLS,PARTITIONS
   where PARTITIONS.SD_ID = SDS.SD_ID
   and TBLS.TBL_ID=PARTITIONS.TBL_ID
   and TBLS.TBL_NAME = 'mytbl'
   order by 1,2;
   
   
   mytbl        mydate=2016-11-07       s3a://redact/M5/table/mytbl/2016/11/07
   mytbl        mydate=2019-12-01       s3a://redact/M/table/mytbl/2019/12/01
   
   
   
   
   
   query1:
   select count(1) from someschema.mytbl where datestr = '2016-11-07'
   
   works fine from both hiveserver2 and presto
   
   query2:
   select count(1) from someschema.mytbl where datestr = '2019-12-01'
   
   presto gives unhelpful error:
   
   io.prestosql.spi.PrestoException: HIVE_UNKNOWN_ERROR
        at 
io.prestosql.plugin.hive.BackgroundHiveSplitLoader$HiveSplitLoaderTask.process(BackgroundHiveSplitLoader.java:223)
        at 
io.prestosql.plugin.hive.util.ResumableTasks$1.run(ResumableTasks.java:38)
        at 
io.prestosql.$gen.Presto_ff748c3_dirty____20200610_171635_2.run(Unknown Source)
        at 
io.airlift.concurrent.BoundedExecutor.drainQueue(BoundedExecutor.java:78)
        at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown 
Source)
        at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown 
Source)
        at java.base/java.lang.Thread.run(Unknown Source)
   Caused by: java.lang.ArrayIndexOutOfBoundsException: undefined
   
   
   hiveserver2 gives more verbose yet still not too helpful error:
   2020-06-12T18:22:23,375  WARN [HiveServer2-Handler-Pool: Thread-12109] 
thrift.ThriftCLIService: Error fetching results:
   org.apache.hive.service.cli.HiveSQLException: java.io.IOException: 
java.lang.ArrayIndexOutOfBoundsException: 2
           at 
org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:499)
 ~[hive-service-2.3.4.jar:2.3.4]
           at 
org.apache.hive.service.cli.operation.OperationManager.getOperationNextRowSet(OperationManager.java:307)
 ~[hive-service-2.3.4.jar:2.3.4]
           at 
org.apache.hive.service.cli.session.HiveSessionImpl.fetchResults(HiveSessionImpl.java:878)
 ~[hive-service-2.3.4.jar:2.3.4]
           at sun.reflect.GeneratedMethodAccessor135.invoke(Unknown Source) 
~[?:?]
           at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 ~[?:1.8.0_252]
           at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_252]
           at 
org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:78)
 ~[hive-service-2.3.4.jar:2.3.4]
           at 
org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:36)
 ~[hive-service-2.3.4.jar:2.3.4]
           at 
org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:63)
 ~[hive-service-2.3.4.jar:2.3.4]
           at java.security.AccessController.doPrivileged(Native Method) 
~[?:1.8.0_252]
           at javax.security.auth.Subject.doAs(Subject.java:422) ~[?:1.8.0_252]
           at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1844)
 ~[hadoop-common-2.8.5.jar:?]
           at 
org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:59)
 ~[hive-service-2.3.4.jar:2.3.4]
           at com.sun.proxy.$Proxy42.fetchResults(Unknown Source) ~[?:?]
           at 
org.apache.hive.service.cli.CLIService.fetchResults(CLIService.java:559) 
~[hive-service-2.3.4.jar:2.3.4]
           at 
org.apache.hive.service.cli.thrift.ThriftCLIService.FetchResults(ThriftCLIService.java:751)
 ~[hive-service-2.3.4.jar:2.3.4]
           at 
org.apache.hive.service.rpc.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1717)
 ~[hive-exec-2.3.4.jar:2.3.4]
           at 
org.apache.hive.service.rpc.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1702)
 ~[hive-exec-2.3.4.jar:2.3.4]
           at 
org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39) 
~[hive-exec-2.3.4.jar:2.3.4]
           at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39) 
~[hive-exec-2.3.4.jar:2.3.4]
           at 
org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56)
 ~[hive-service-2.3.4.jar:2.3.4]
           at 
org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
 ~[hive-exec-2.3.4.jar:2.3.4]
           at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 
[?:1.8.0_252]
           at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 
[?:1.8.0_252]
           at java.lang.Thread.run(Thread.java:748) [?:1.8.0_252]
   Caused by: java.io.IOException: java.lang.ArrayIndexOutOfBoundsException: 2
           at 
org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:521) 
~[hive-exec-2.3.4.jar:2.3.4]
           at 
org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:428) 
~[hive-exec-2.3.4.jar:2.3.4]
           at 
org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:147) 
~[hive-exec-2.3.4.jar:2.3.4]
           at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:2208) 
~[hive-exec-2.3.4.jar:2.3.4]
           at 
org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:494)
 ~[hive-service-2.3.4.jar:2.3.4]
           ... 24 more
   Caused by: java.lang.ArrayIndexOutOfBoundsException: 2
           at 
com.uber.hoodie.common.util.FSUtils.getCommitTime(FSUtils.java:120) 
~[hoodiebundle.jar:?]
           at 
com.uber.hoodie.common.model.HoodieDataFile.getCommitTime(HoodieDataFile.java:37)
 ~[hoodiebundle.jar:?]
           at 
com.uber.hoodie.common.model.HoodieFileGroup.addDataFile(HoodieFileGroup.java:89)
 ~[hoodiebundle.jar:?]
           at 
com.uber.hoodie.common.table.view.HoodieTableFileSystemView.lambda$null$3(HoodieTableFileSystemView.java:155)
 ~[hoodiebundle.jar:?]
           at java.util.ArrayList.forEach(ArrayList.java:1257) ~[?:1.8.0_252]
           at 
com.uber.hoodie.common.table.view.HoodieTableFileSystemView.lambda$addFilesToView$5(HoodieTableFileSystemView.java:155)
 ~[hoodiebundle.jar:?]
           at java.lang.Iterable.forEach(Iterable.java:75) ~[?:1.8.0_252]
           at 
com.uber.hoodie.common.table.view.HoodieTableFileSystemView.addFilesToView(HoodieTableFileSystemView.java:151)
 ~[hoodiebundle.jar:?]
           at 
com.uber.hoodie.common.table.view.HoodieTableFileSystemView.<init>(HoodieTableFileSystemView.java:107)
 ~[hoodiebundle.jar:?]
           at 
com.uber.hoodie.hadoop.HoodieInputFormat.listStatus(HoodieInputFormat.java:88) 
~[hoodiebundle.jar:?]
           at 
org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:322) 
~[hadoop-mapreduce-client-core-2.8.5.jar:?]
           at 
org.apache.hadoop.hive.ql.exec.FetchOperator.getNextSplits(FetchOperator.java:372)
 ~[hive-exec-2.3.4.jar:2.3.4]
           at 
org.apache.hadoop.hive.ql.exec.FetchOperator.getRecordReader(FetchOperator.java:304)
 ~[hive-exec-2.3.4.jar:2.3.4]
           at 
org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:459) 
~[hive-exec-2.3.4.jar:2.3.4]
           at 
org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:428) 
~[hive-exec-2.3.4.jar:2.3.4]
           at 
org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:147) 
~[hive-exec-2.3.4.jar:2.3.4]
           at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:2208) 
~[hive-exec-2.3.4.jar:2.3.4]
           at 
org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:494)
 ~[hive-service-2.3.4.jar:2.3.4]
           ... 24 more
   
   ```


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to