[ 
https://issues.apache.org/jira/browse/TEZ-1912?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Hitesh Shah updated TEZ-1912:
-----------------------------
    Fix Version/s: 0.7.0

> Merge exceptions are thrown when enabling 
> tez.runtime.shuffle.memory-to-memory.enable && 
> tez.runtime.shuffle.memory-to-memory.segments
> --------------------------------------------------------------------------------------------------------------------------------------
>
>                 Key: TEZ-1912
>                 URL: https://issues.apache.org/jira/browse/TEZ-1912
>             Project: Apache Tez
>          Issue Type: Bug
>            Reporter: Rajesh Balamohan
>             Fix For: 0.7.0
>
>         Attachments: TEZ-1912.1.patch
>
>
> Merge exceptions are thrown when running a hive query on tez with the 
> following setting.  It works fine without mem-to-mem merge setting.
> {code}
> 2015-01-04 20:04:01,371 ERROR [ShuffleAndMergeRunner [Map_1]] 
> orderedgrouped.Shuffle: ShuffleRunner failed with error
> org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$ShuffleError:
>  Error while doing final merge
>         at 
> org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.call(Shuffle.java:364)
>         at 
> org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.call(Shuffle.java:327)
>         at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>         at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>         at java.lang.Thread.run(Thread.java:745)
> Caused by: java.io.IOException: Rec# 22630125: Negative value-length: -1
>         at 
> org.apache.tez.runtime.library.common.sort.impl.IFile$Reader.positionToNextRecord(IFile.java:720)
>         at 
> org.apache.tez.runtime.library.common.shuffle.orderedgrouped.InMemoryReader.readRawKey(InMemoryReader.java:104)
>         at 
> org.apache.tez.runtime.library.common.sort.impl.TezMerger$Segment.readRawKey(TezMerger.java:329)
>         at 
> org.apache.tez.runtime.library.common.sort.impl.TezMerger$MergeQueue.adjustPriorityQueue(TezMerger.java:500)
>         at 
> org.apache.tez.runtime.library.common.sort.impl.TezMerger$MergeQueue.next(TezMerger.java:545)
>         at 
> org.apache.tez.runtime.library.common.sort.impl.TezMerger.writeFile(TezMerger.java:204)
>         at 
> org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeManager.finalMerge(MergeManager.java:862)
>         at 
> org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeManager.close(MergeManager.java:473)
>         at 
> org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.call(Shuffle.java:362)
>         ... 5 more
> {code}
> {code}
> $HIVE_HOME/bin/hive -hiveconf tez.runtime.io.sort.factor=200 --hiveconf 
> tez.shuffle-vertex-manager.min-src-fraction=1.0 --hiveconf 
> tez.shuffle-vertex-manager.max-src-fraction=1.0 --hiveconf 
> hive.tez.auto.reducer.parallelism=false --hiveconf 
> tez.am.heartbeat.interval-ms.max=20 --hiveconf tez.runtime.io.sort.mb=1200 
> --hiveconf tez.runtime.sort.threads=2 --hiveconf 
> tez.history.logging.service.class=org.apache.tez.dag.history.logging.impl.SimpleHistoryLoggingService
>  --hiveconf hive.tez.container.size=4096 --hiveconf 
> tez.runtime.shuffle.memory-to-memory.enable=true --hiveconf 
> tez.runtime.shuffle.memory-to-memory.segments=4
> --10 TB dataset
> use tpcds4_bin_partitioned_orc_10000;
> drop table testData;
> create table testData as select 
> ss_sold_date_sk,ss_sold_time_sk,ss_item_sk,ss_customer_sk,ss_quantity,ss_sold_date
>  from store_sales distribute by ss_sold_date;
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to