[ 
https://issues.apache.org/jira/browse/TEZ-1807?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Tsuyoshi OZAWA updated TEZ-1807:
--------------------------------
    Description: 
I ran Hive 0.14 on Tez 0.5.2.  Data size is 100GB texts generated by 
RandomTextWriter. 

{code}
create external table randomText100GB(
  text string 
) location 'hdfs:///user/ozawa/randomText100GB'; 

CREATE TABLE wordcount AS
SELECT word, count(1) AS count 
FROM (SELECT 
EXPLODE(SPLIT(LCASE(REGEXP_REPLACE(text,'[\\p{Punct},\\p{Cntrl}]','')),' '))
AS word FROM randomText100GB) words
GROUP BY word;
{code}

As a result, an exception is thrown:

{quote}
--------------------------------------------------------------------------------
        VERTICES      STATUS  TOTAL  COMPLETED  RUNNING  PENDING  FAILED  KILLED
--------------------------------------------------------------------------------
Map 1 .........       KILLED    115        104        0       11       0      11
Reducer 2             FAILED      3          0        0        3       1       2
--------------------------------------------------------------------------------
VERTICES: 00/02  [======================>>----] 88%   ELAPSED TIME: 108.81 s   
--------------------------------------------------------------------------------
Status: Failed
Vertex failed, vertexName=Reducer 2, vertexId=vertex_1417036912823_0071_1_01, 
diagnostics=[Task failed, taskId=task_1417036912823_0071_1_01_000000, 
diagnostics=[TaskAttempt 0 failed, info=[Error: 
exceptionThrown=org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$ShuffleError:
 error in shuffle in MemToMemMerger [Map_1]
        at 
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.call(Shuffle.java:338)
        at 
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.call(Shuffle.java:319)
        at java.util.concurrent.FutureTask.run(FutureTask.java:262)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
        at java.lang.Thread.run(Thread.java:745)
Caused by: java.io.EOFException: Reach the limit of the buffer
        at 
org.apache.hadoop.io.BoundedByteArrayOutputStream.write(BoundedByteArrayOutputStream.java:92)
        at 
org.apache.tez.runtime.library.common.sort.impl.IFileOutputStream.write(IFileOutputStream.java:120)
        at java.io.DataOutputStream.write(DataOutputStream.java:107)
        at 
org.apache.tez.runtime.library.common.sort.impl.IFile$Writer.writeKVPair(IFile.java:420)
        at 
org.apache.tez.runtime.library.common.sort.impl.IFile$Writer.append(IFile.java:390)
        at 
org.apache.tez.runtime.library.common.sort.impl.TezMerger.writeFile(TezMerger.java:203)
        at 
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeManager$IntermediateMemoryToMemoryMerger.merge(MergeManager.java:527)
        at 
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeThread.run(MergeThread.java:89)
, errorMessage=Shuffle Runner 
Failed:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$ShuffleError:
 error in shuffle in MemToMemMerger [Map_1]
        at 
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.call(Shuffle.java:338)
        at 
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.call(Shuffle.java:319)
        at java.util.concurrent.FutureTask.run(FutureTask.java:262)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
        at java.lang.Thread.run(Thread.java:745)
Caused by: java.io.EOFException: Reach the limit of the buffer
        at 
org.apache.hadoop.io.BoundedByteArrayOutputStream.write(BoundedByteArrayOutputStream.java:92)
        at 
org.apache.tez.runtime.library.common.sort.impl.IFileOutputStream.write(IFileOutputStream.java:120)
        at java.io.DataOutputStream.write(DataOutputStream.java:107)
        at 
org.apache.tez.runtime.library.common.sort.impl.IFile$Writer.writeKVPair(IFile.java:420)
        at 
org.apache.tez.runtime.library.common.sort.impl.IFile$Writer.append(IFile.java:390)
        at 
org.apache.tez.runtime.library.common.sort.impl.TezMerger.writeFile(TezMerger.java:203)
        at 
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeManager$IntermediateMemoryToMemoryMerger.merge(MergeManager.java:527)
        at 
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeThread.run(MergeThread.java:89)
]], Vertex failed as one or more tasks failed. failedTasks:1, Vertex 
vertex_1417036912823_0071_1_01 [Reducer 2] killed/failed due to:null]
Vertex killed, vertexName=Map 1, vertexId=vertex_1417036912823_0071_1_00, 
diagnostics=[Vertex received Kill while in RUNNING state., Vertex killed as 
other vertex failed. failedTasks:0, Vertex vertex_1417036912823_0071_1_00 [Map 
1] killed/failed due to:null]
DAG failed due to vertex failure. failedVertices:1 killedVertices:1
{quote}

  was:
I ran Hive 0.14 on Tez 0.5.2 and master.  Data size is 100GB texts generated by 
RandomTextWriter. 

{code}
create external table randomText100GB(
  text string 
) location 'hdfs:///user/ozawa/randomText100GB'; 

CREATE TABLE wordcount AS
SELECT word, count(1) AS count 
FROM (SELECT 
EXPLODE(SPLIT(LCASE(REGEXP_REPLACE(text,'[\\p{Punct},\\p{Cntrl}]','')),' '))
AS word FROM randomText100GB) words
GROUP BY word;
{code}

As a result, an exception is thrown:

{quote}
--------------------------------------------------------------------------------
        VERTICES      STATUS  TOTAL  COMPLETED  RUNNING  PENDING  FAILED  KILLED
--------------------------------------------------------------------------------
Map 1 .........       KILLED    115        104        0       11       0      11
Reducer 2             FAILED      3          0        0        3       1       2
--------------------------------------------------------------------------------
VERTICES: 00/02  [======================>>----] 88%   ELAPSED TIME: 108.81 s   
--------------------------------------------------------------------------------
Status: Failed
Vertex failed, vertexName=Reducer 2, vertexId=vertex_1417036912823_0071_1_01, 
diagnostics=[Task failed, taskId=task_1417036912823_0071_1_01_000000, 
diagnostics=[TaskAttempt 0 failed, info=[Error: 
exceptionThrown=org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$ShuffleError:
 error in shuffle in MemToMemMerger [Map_1]
        at 
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.call(Shuffle.java:338)
        at 
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.call(Shuffle.java:319)
        at java.util.concurrent.FutureTask.run(FutureTask.java:262)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
        at java.lang.Thread.run(Thread.java:745)
Caused by: java.io.EOFException: Reach the limit of the buffer
        at 
org.apache.hadoop.io.BoundedByteArrayOutputStream.write(BoundedByteArrayOutputStream.java:92)
        at 
org.apache.tez.runtime.library.common.sort.impl.IFileOutputStream.write(IFileOutputStream.java:120)
        at java.io.DataOutputStream.write(DataOutputStream.java:107)
        at 
org.apache.tez.runtime.library.common.sort.impl.IFile$Writer.writeKVPair(IFile.java:420)
        at 
org.apache.tez.runtime.library.common.sort.impl.IFile$Writer.append(IFile.java:390)
        at 
org.apache.tez.runtime.library.common.sort.impl.TezMerger.writeFile(TezMerger.java:203)
        at 
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeManager$IntermediateMemoryToMemoryMerger.merge(MergeManager.java:527)
        at 
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeThread.run(MergeThread.java:89)
, errorMessage=Shuffle Runner 
Failed:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$ShuffleError:
 error in shuffle in MemToMemMerger [Map_1]
        at 
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.call(Shuffle.java:338)
        at 
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.call(Shuffle.java:319)
        at java.util.concurrent.FutureTask.run(FutureTask.java:262)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
        at java.lang.Thread.run(Thread.java:745)
Caused by: java.io.EOFException: Reach the limit of the buffer
        at 
org.apache.hadoop.io.BoundedByteArrayOutputStream.write(BoundedByteArrayOutputStream.java:92)
        at 
org.apache.tez.runtime.library.common.sort.impl.IFileOutputStream.write(IFileOutputStream.java:120)
        at java.io.DataOutputStream.write(DataOutputStream.java:107)
        at 
org.apache.tez.runtime.library.common.sort.impl.IFile$Writer.writeKVPair(IFile.java:420)
        at 
org.apache.tez.runtime.library.common.sort.impl.IFile$Writer.append(IFile.java:390)
        at 
org.apache.tez.runtime.library.common.sort.impl.TezMerger.writeFile(TezMerger.java:203)
        at 
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeManager$IntermediateMemoryToMemoryMerger.merge(MergeManager.java:527)
        at 
org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeThread.run(MergeThread.java:89)
]], Vertex failed as one or more tasks failed. failedTasks:1, Vertex 
vertex_1417036912823_0071_1_01 [Reducer 2] killed/failed due to:null]
Vertex killed, vertexName=Map 1, vertexId=vertex_1417036912823_0071_1_00, 
diagnostics=[Vertex received Kill while in RUNNING state., Vertex killed as 
other vertex failed. failedTasks:0, Vertex vertex_1417036912823_0071_1_00 [Map 
1] killed/failed due to:null]
DAG failed due to vertex failure. failedVertices:1 killedVertices:1
{quote}


> BoundedByteArrayOutputStream throw EOFException because of "Reach the limit 
> of the buffer"
> ------------------------------------------------------------------------------------------
>
>                 Key: TEZ-1807
>                 URL: https://issues.apache.org/jira/browse/TEZ-1807
>             Project: Apache Tez
>          Issue Type: Improvement
>    Affects Versions: 0.6.0, 0.5.2
>            Reporter: Tsuyoshi OZAWA
>
> I ran Hive 0.14 on Tez 0.5.2.  Data size is 100GB texts generated by 
> RandomTextWriter. 
> {code}
> create external table randomText100GB(
>   text string 
> ) location 'hdfs:///user/ozawa/randomText100GB'; 
> CREATE TABLE wordcount AS
> SELECT word, count(1) AS count 
> FROM (SELECT 
> EXPLODE(SPLIT(LCASE(REGEXP_REPLACE(text,'[\\p{Punct},\\p{Cntrl}]','')),' '))
> AS word FROM randomText100GB) words
> GROUP BY word;
> {code}
> As a result, an exception is thrown:
> {quote}
> --------------------------------------------------------------------------------
>         VERTICES      STATUS  TOTAL  COMPLETED  RUNNING  PENDING  FAILED  
> KILLED
> --------------------------------------------------------------------------------
> Map 1 .........       KILLED    115        104        0       11       0      
> 11
> Reducer 2             FAILED      3          0        0        3       1      
>  2
> --------------------------------------------------------------------------------
> VERTICES: 00/02  [======================>>----] 88%   ELAPSED TIME: 108.81 s  
>  
> --------------------------------------------------------------------------------
> Status: Failed
> Vertex failed, vertexName=Reducer 2, vertexId=vertex_1417036912823_0071_1_01, 
> diagnostics=[Task failed, taskId=task_1417036912823_0071_1_01_000000, 
> diagnostics=[TaskAttempt 0 failed, info=[Error: 
> exceptionThrown=org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$ShuffleError:
>  error in shuffle in MemToMemMerger [Map_1]
>         at 
> org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.call(Shuffle.java:338)
>         at 
> org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.call(Shuffle.java:319)
>         at java.util.concurrent.FutureTask.run(FutureTask.java:262)
>         at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
>         at java.lang.Thread.run(Thread.java:745)
> Caused by: java.io.EOFException: Reach the limit of the buffer
>         at 
> org.apache.hadoop.io.BoundedByteArrayOutputStream.write(BoundedByteArrayOutputStream.java:92)
>         at 
> org.apache.tez.runtime.library.common.sort.impl.IFileOutputStream.write(IFileOutputStream.java:120)
>         at java.io.DataOutputStream.write(DataOutputStream.java:107)
>         at 
> org.apache.tez.runtime.library.common.sort.impl.IFile$Writer.writeKVPair(IFile.java:420)
>         at 
> org.apache.tez.runtime.library.common.sort.impl.IFile$Writer.append(IFile.java:390)
>         at 
> org.apache.tez.runtime.library.common.sort.impl.TezMerger.writeFile(TezMerger.java:203)
>         at 
> org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeManager$IntermediateMemoryToMemoryMerger.merge(MergeManager.java:527)
>         at 
> org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeThread.run(MergeThread.java:89)
> , errorMessage=Shuffle Runner 
> Failed:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$ShuffleError:
>  error in shuffle in MemToMemMerger [Map_1]
>         at 
> org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.call(Shuffle.java:338)
>         at 
> org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.call(Shuffle.java:319)
>         at java.util.concurrent.FutureTask.run(FutureTask.java:262)
>         at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
>         at java.lang.Thread.run(Thread.java:745)
> Caused by: java.io.EOFException: Reach the limit of the buffer
>         at 
> org.apache.hadoop.io.BoundedByteArrayOutputStream.write(BoundedByteArrayOutputStream.java:92)
>         at 
> org.apache.tez.runtime.library.common.sort.impl.IFileOutputStream.write(IFileOutputStream.java:120)
>         at java.io.DataOutputStream.write(DataOutputStream.java:107)
>         at 
> org.apache.tez.runtime.library.common.sort.impl.IFile$Writer.writeKVPair(IFile.java:420)
>         at 
> org.apache.tez.runtime.library.common.sort.impl.IFile$Writer.append(IFile.java:390)
>         at 
> org.apache.tez.runtime.library.common.sort.impl.TezMerger.writeFile(TezMerger.java:203)
>         at 
> org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeManager$IntermediateMemoryToMemoryMerger.merge(MergeManager.java:527)
>         at 
> org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MergeThread.run(MergeThread.java:89)
> ]], Vertex failed as one or more tasks failed. failedTasks:1, Vertex 
> vertex_1417036912823_0071_1_01 [Reducer 2] killed/failed due to:null]
> Vertex killed, vertexName=Map 1, vertexId=vertex_1417036912823_0071_1_00, 
> diagnostics=[Vertex received Kill while in RUNNING state., Vertex killed as 
> other vertex failed. failedTasks:0, Vertex vertex_1417036912823_0071_1_00 
> [Map 1] killed/failed due to:null]
> DAG failed due to vertex failure. failedVertices:1 killedVertices:1
> {quote}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to