[ 
https://issues.apache.org/jira/browse/TEZ-1547?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14187985#comment-14187985
 ] 

Rajesh Balamohan commented on TEZ-1547:
---------------------------------------

Used tez (commit log: d59b2318dd66ee1784dabca28820e9b8e65f8bf1) with .5 patch.

It works fine with smaller jobs.  With large jobs (large number of tasks with 
very small runtimes), DAGAppMaster gets locked out.  Pasting the threaddump 
here for reference.

{code}
Thread 25715: (state = BLOCKED)
 - sun.misc.Unsafe.park(boolean, long) @bci=0 (Compiled frame; information may 
be imprecise)
 - java.util.concurrent.locks.LockSupport.park(java.lang.Object) @bci=14, 
line=175 (Compiled frame)
 - 
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt() 
@bci=1, line=836 (Compiled frame)
 - 
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(java.util.concurrent.locks.AbstractQueuedSynchronizer$Node,
 int) @bci=67, line=870 (Compiled frame)
 - java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(int) @bci=17, 
line=1199 (Compiled frame)
 - java.util.concurrent.locks.ReentrantReadWriteLock$WriteLock.lock() @bci=5, 
line=943 (Compiled frame)
 - 
org.apache.tez.dag.app.dag.StateChangeNotifier.unregisterForVertexUpdates(java.lang.String,
 org.apache.tez.dag.app.dag.VertexStateUpdateListener) @bci=10, line=92 
(Interpreted frame)
 - 
org.apache.tez.dag.app.dag.impl.VertexManager$VertexManagerPluginContextImpl.unregisterForVertexStatusUpdates()
 @bci=45, line=254 (Interpreted frame)
 - 
org.apache.tez.dag.app.dag.impl.VertexManager$VertexManagerPluginContextImpl.vertexManagerDone()
 @bci=51, line=269 (Interpreted frame)
 - 
org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager.schedulePendingTasks(int)
 @bci=131, line=540 (Interpreted frame)
 - 
org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager.schedulePendingTasks()
 @bci=365, line=627 (Compiled frame)
 - 
org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager.onVertexStateUpdated(org.apache.tez.dag.api.event.VertexStateUpdate)
 @bci=208, line=710 (Interpreted frame)
 - 
org.apache.tez.dag.app.dag.impl.VertexManager$VertexManagerPluginContextImpl.onStateUpdated(org.apache.tez.dag.api.event.VertexStateUpdate)
 @bci=96, line=293 (Interpreted frame)
 - 
org.apache.tez.dag.app.dag.StateChangeNotifier$ListenerContainer.sendStateUpdate(org.apache.tez.dag.api.event.VertexStateUpdate)
 @bci=21, line=138 (Interpreted frame)
 - 
org.apache.tez.dag.app.dag.StateChangeNotifier$ListenerContainer.access$100(org.apache.tez.dag.app.dag.StateChangeNotifier$ListenerContainer,
 org.apache.tez.dag.api.event.VertexStateUpdate) @bci=2, line=122 (Interpreted 
frame)
 - 
org.apache.tez.dag.app.dag.StateChangeNotifier.sendStateUpdate(org.apache.tez.dag.records.TezVertexID,
 org.apache.tez.dag.api.event.VertexStateUpdate) @bci=39, line=116 (Interpreted 
frame)
 - 
org.apache.tez.dag.app.dag.StateChangeNotifier.stateChanged(org.apache.tez.dag.records.TezVertexID,
 org.apache.tez.dag.api.event.VertexStateUpdate) @bci=35, line=106 (Interpreted 
frame)
 - org.apache.tez.dag.app.dag.impl.VertexImpl.doneReconfiguringVertex() 
@bci=55, line=1467 (Interpreted frame)
 - 
org.apache.tez.dag.app.dag.impl.VertexManager$VertexManagerPluginContextImpl.doneReconfiguringVertex()
 @bci=11, line=281 (Interpreted frame)
 - 
org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager.schedulePendingTasks(int)
 @bci=34, line=529 (Interpreted frame)
 - 
org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager.schedulePendingTasks()
 @bci=142, line=584 (Compiled frame)
 - 
org.apache.tez.dag.library.vertexmanager.ShuffleVertexManager.onSourceTaskCompleted(java.lang.String,
 java.lang.Integer) @bci=74, line=365 (Interpreted frame)
 - 
org.apache.tez.dag.app.dag.impl.VertexManager.onSourceTaskCompleted(org.apache.tez.dag.records.TezTaskID)
 @bci=52, line=364 (Interpreted frame)
 - 
org.apache.tez.dag.app.dag.impl.VertexImpl$SourceTaskAttemptCompletedEventTransition.transition(org.apache.tez.dag.app.dag.impl.VertexImpl,
 org.apache.tez.dag.app.dag.event.VertexEvent) @bci=118, line=3364 (Interpreted 
frame)
 - 
org.apache.tez.dag.app.dag.impl.VertexImpl$SourceTaskAttemptCompletedEventTransition.transition(java.lang.Object,
 java.lang.Object) @bci=9, line=3345 (Interpreted frame)
 - 
org.apache.hadoop.yarn.state.StateMachineFactory$MultipleInternalArc.doTransition(java.lang.Object,
 java.lang.Enum, java.lang.Object, java.lang.Enum) @bci=6, line=385 (Compiled 
frame)
 - 
org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(java.lang.Object, 
java.lang.Enum, java.lang.Enum, java.lang.Object) @bci=45, line=302 (Compiled 
frame)
 - 
org.apache.hadoop.yarn.state.StateMachineFactory.access$300(org.apache.hadoop.yarn.state.StateMachineFactory,
 java.lang.Object, java.lang.Enum, java.lang.Enum, java.lang.Object) @bci=6, 
line=46 (Compiled frame)
 - 
org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(java.lang.Enum,
 java.lang.Object) @bci=15, line=448 (Compiled frame)
 - org.apache.tez.state.StateMachineTez.doTransition(java.lang.Enum, 
java.lang.Object) @bci=16, line=57 (Compiled frame)
 - 
org.apache.tez.dag.app.dag.impl.VertexImpl.handle(org.apache.tez.dag.app.dag.event.VertexEvent)
 @bci=101, line=1489 (Compiled frame)
 - 
org.apache.tez.dag.app.dag.impl.VertexImpl.handle(org.apache.hadoop.yarn.event.Event)
 @bci=5, line=175 (Compiled frame)
 - 
org.apache.tez.dag.app.DAGAppMaster$VertexEventDispatcher.handle(org.apache.tez.dag.app.dag.event.VertexEvent)
 @bci=60, line=1734 (Compiled frame)
 - 
org.apache.tez.dag.app.DAGAppMaster$VertexEventDispatcher.handle(org.apache.hadoop.yarn.event.Event)
 @bci=5, line=1720 (Compiled frame)
 - 
org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(org.apache.hadoop.yarn.event.Event)
 @bci=86, line=173 (Compiled frame)
 - org.apache.hadoop.yarn.event.AsyncDispatcher$1.run() @bci=140, line=106 
(Interpreted frame)
 - java.lang.Thread.run() @bci=11, line=745 (Interpreted frame)
{code}


> Make use of state change notifier in VertexManagerPlugins
> ---------------------------------------------------------
>
>                 Key: TEZ-1547
>                 URL: https://issues.apache.org/jira/browse/TEZ-1547
>             Project: Apache Tez
>          Issue Type: Improvement
>            Reporter: Siddharth Seth
>            Assignee: Bikas Saha
>         Attachments: TEZ-1547.1.patch, TEZ-1547.3.patch, TEZ-1547.4.patch, 
> TEZ-1547.5.patch
>
>
> Instead of the various APIs like onVertexStarted, simple notifications could 
> be sent.
> Some existing APIs could end up being deprecated.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to