[ 
https://issues.apache.org/jira/browse/HUDI-1286?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Raymond Xu updated HUDI-1286:
-----------------------------
    Component/s: dev-experience

> Merge On Read queries (_rt) fails on docker demo for test suite
> ---------------------------------------------------------------
>
>                 Key: HUDI-1286
>                 URL: https://issues.apache.org/jira/browse/HUDI-1286
>             Project: Apache Hudi
>          Issue Type: Bug
>          Components: dev-experience, Testing, tests-ci
>    Affects Versions: 0.9.0
>            Reporter: Nishith Agarwal
>            Assignee: Nishith Agarwal
>            Priority: Major
>             Fix For: 0.11.0
>
>
> When running the following query -> 
> {code:java}
> select count(*) from testdb.table1_rt
> {code}
> we see the following exception in hiveserver :
> {code:java}
> 2020-09-16T03:41:07,668 INFO  LocalJobRunner Map Task Executor #0: 
> realtime.AbstractRealtimeRecordReader 
> (AbstractRealtimeRecordReader.java:init(88)) - Writer Schema From Parquet => 
> [_hoodie_commit_time type:UNION pos:0, _hoodie_commit_seqno type:UNION pos:1, 
> _hoodie_record_key type:UNION pos:2, _hoodie_partition_path type:UNION pos:3, 
> _hoodie_file_name type:UNION pos:4, timestamp type:LONG pos:5, _row_key 
> type:STRING pos:6, rider type:STRING pos:7, driver type:STRING pos:8, 
> begin_lat type:DOUBLE pos:9, begin_lon type:DOUBLE pos:10, end_lat 
> type:DOUBLE pos:11, end_lon type:DOUBLE pos:12, fare type:DOUBLE 
> pos:13]2020-09-16T03:41:07,668 INFO  LocalJobRunner Map Task Executor #0: 
> realtime.AbstractRealtimeRecordReader 
> (AbstractRealtimeRecordReader.java:init(88)) - Writer Schema From Parquet => 
> [_hoodie_commit_time type:UNION pos:0, _hoodie_commit_seqno type:UNION pos:1, 
> _hoodie_record_key type:UNION pos:2, _hoodie_partition_path type:UNION pos:3, 
> _hoodie_file_name type:UNION pos:4, timestamp type:LONG pos:5, _row_key 
> type:STRING pos:6, rider type:STRING pos:7, driver type:STRING pos:8, 
> begin_lat type:DOUBLE pos:9, begin_lon type:DOUBLE pos:10, end_lat 
> type:DOUBLE pos:11, end_lon type:DOUBLE pos:12, fare type:DOUBLE 
> pos:13]2020-09-16T03:41:07,670 INFO  [Thread-465]: mapred.LocalJobRunner 
> (LocalJobRunner.java:runTasks(483)) - map task executor 
> complete.2020-09-16T03:41:07,671 WARN  [Thread-465]: mapred.LocalJobRunner 
> (LocalJobRunner.java:run(587)) - job_local242522391_0010java.lang.Exception: 
> java.io.IOException: org.apache.hudi.exception.HoodieException: Error 
> ordering fields for storage read. #fieldNames: 4, #fieldPositions: 5 at 
> org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:489) 
> ~[hadoop-mapreduce-client-common-2.8.4.jar:?] at 
> org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:549) 
> ~[hadoop-mapreduce-client-common-2.8.4.jar:?]Caused by: java.io.IOException: 
> org.apache.hudi.exception.HoodieException: Error ordering fields for storage 
> read. #fieldNames: 4, #fieldPositions: 5 at 
> org.apache.hadoop.hive.io.HiveIOExceptionHandlerChain.handleRecordReaderCreationException(HiveIOExceptionHandlerChain.java:97)
>  ~[hive-exec-2.3.3.jar:2.3.3] at 
> org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(HiveIOExceptionHandlerUtil.java:57)
>  ~[hive-exec-2.3.3.jar:2.3.3] at 
> org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:379)
>  ~[hive-exec-2.3.3.jar:2.3.3] at 
> org.apache.hadoop.mapred.MapTask$TrackedRecordReader.<init>(MapTask.java:169) 
> ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at 
> org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:432) 
> ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at 
> org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) 
> ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at 
> org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:270)
>  ~[hadoop-mapreduce-client-common-2.8.4.jar:?] at 
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) 
> ~[?:1.8.0_212] at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
> ~[?:1.8.0_212] at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>  [?:1.8.0_212] at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>  [?:1.8.0_212] at java.lang.Thread.run(Thread.java:748) [?:1.8.0_212]Caused 
> by: org.apache.hudi.exception.HoodieException: Error ordering fields for 
> storage read. #fieldNames: 4, #fieldPositions: 5 at 
> org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils.orderFields(HoodieRealtimeRecordReaderUtils.java:258)
>  ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at 
> org.apache.hudi.hadoop.realtime.AbstractRealtimeRecordReader.init(AbstractRealtimeRecordReader.java:99)
>  ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at 
> org.apache.hudi.hadoop.realtime.AbstractRealtimeRecordReader.<init>(AbstractRealtimeRecordReader.java:67)
>  ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at 
> org.apache.hudi.hadoop.realtime.RealtimeCompactedRecordReader.<init>(RealtimeCompactedRecordReader.java:53)
>  ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at 
> org.apache.hudi.hadoop.realtime.HoodieRealtimeRecordReader.constructRecordReader(HoodieRealtimeRecordReader.java:70)
>  ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at 
> org.apache.hudi.hadoop.realtime.HoodieRealtimeRecordReader.<init>(HoodieRealtimeRecordReader.java:47)
>  ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at 
> org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat.getRecordReader(HoodieParquetRealtimeInputFormat.java:120)
>  ~[hoodie-hadoop-mr-bundle.jar:0.6.1-SNAPSHOT] at 
> org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:376)
>  ~[hive-exec-2.3.3.jar:2.3.3] at 
> org.apache.hadoop.mapred.MapTask$TrackedRecordReader.<init>(MapTask.java:169) 
> ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at 
> org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:432) 
> ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at 
> org.apache.hadoop.mapred.MapTask.run(MapTask.java:343) 
> ~[hadoop-mapreduce-client-core-2.8.4.jar:?] at 
> org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:270)
>  ~[hadoop-mapreduce-client-common-2.8.4.jar:?] at 
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) 
> ~[?:1.8.0_212] at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
> ~[?:1.8.0_212] at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>  ~[?:1.8.0_212] at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>  ~[?:1.8.0_212] at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_212]{code}
>  
> This issue was seen during making HiveCombineInputFormat work with real time 
> tables, we are seeing this issue again.



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

Reply via email to