[ https://issues.apache.org/jira/browse/YARN-1689?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13894585#comment-13894585 ]
Hudson commented on YARN-1689: ------------------------------ SUCCESS: Integrated in Hadoop-Mapreduce-trunk #1691 (See [https://builds.apache.org/job/Hadoop-Mapreduce-trunk/1691/]) YARN-1689. Made RMAppAttempt get killed when RMApp is at ACCEPTED. Contributed by Vinod Kumar Vavilapalli. (zjshen: http://svn.apache.org/viewcvs.cgi/?root=Apache-SVN&view=rev&rev=1565497) * /hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt * /hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java * /hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java * /hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java * /hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java * /hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java * /hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java > RMAppAttempt is not killed when RMApp is at ACCEPTED > ---------------------------------------------------- > > Key: YARN-1689 > URL: https://issues.apache.org/jira/browse/YARN-1689 > Project: Hadoop YARN > Issue Type: Bug > Components: resourcemanager > Affects Versions: 2.3.0 > Reporter: Deepesh Khandelwal > Assignee: Vinod Kumar Vavilapalli > Priority: Critical > Fix For: 2.3.0 > > Attachments: RM_UI.png, YARN-1689-20140205.txt > > > When running some Hive on Tez jobs, the RM after a while gets into an > unusable state where no jobs run. In the RM log I see the following exception: > {code} > 2014-02-04 20:28:08,553 WARN ipc.Server (Server.java:run(1978)) - IPC Server > handler 0 on 8030, call > org.apache.hadoop.yarn.api.ApplicationMasterProtocolPB.registerApplicationMaster > from 172.18.145.156:40474 Call#0 Retry#0: error: > java.lang.NullPointerException > java.lang.NullPointerException > at > org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler.getTransferredContainers(AbstractYarnScheduler.java:48) > at > org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService.registerApplicationMaster(ApplicationMasterService.java:278) > at > org.apache.hadoop.yarn.api.impl.pb.service.ApplicationMasterProtocolPBServiceImpl.registerApplicationMaster(ApplicationMasterProtocolPBServiceImpl.java:90) > at > org.apache.hadoop.yarn.proto.ApplicationMasterProtocol$ApplicationMasterProtocolService$2.callBlockingMethod(ApplicationMasterProtocol.java:95) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:585) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:928) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1962) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1958) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:396) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1956) > ...... > 2014-02-04 20:28:08,544 ERROR rmapp.RMAppImpl (RMAppImpl.java:handle(626)) - > Can't handle this event at current state > org.apache.hadoop.yarn.state.InvalidStateTransitonException: Invalid event: > ATTEMPT_REGISTERED at KILLED > at > org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:305) > at > org.apache.hadoop.yarn.state.StateMachineFactory.access$300(StateMachineFactory.java:46) > at > org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:448) > at > org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl.handle(RMAppImpl.java:624) > at > org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl.handle(RMAppImpl.java:81) > at > org.apache.hadoop.yarn.server.resourcemanager.ResourceManager$ApplicationEventDispatcher.handle(ResourceManager.java:656) > at > org.apache.hadoop.yarn.server.resourcemanager.ResourceManager$ApplicationEventDispatcher.handle(ResourceManager.java:640) > at > org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:173) > at > org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:106) > at java.lang.Thread.run(Thread.java:662) > 2014-02-04 20:28:08,549 INFO resourcemanager.RMAuditLogger > (RMAuditLogger.java:logSuccess(140)) - USER=hrt_qa IP=172.18.145.156 > OPERATION=Kill Application Request TARGET=ClientRMService > RESULT=SUCCESS APPID=application_1391543307203_0001 > 2014-02-04 20:28:08,553 WARN ipc.Server (Server.java:run(1978)) - IPC Server > handler 0 on 8030, call > org.apache.hadoop.yarn.api.ApplicationMasterProtocolPB.registerApplicationMaster > from 172.18.145.156:40474 Call#0 Retry#0: error: > java.lang.NullPointerException > java.lang.NullPointerException > at > org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler.getTransferredContainers(AbstractYarnScheduler.java:48) > at > org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService.registerApplicationMaster(ApplicationMasterService.java:278) > at > org.apache.hadoop.yarn.api.impl.pb.service.ApplicationMasterProtocolPBServiceImpl.registerApplicationMaster(ApplicationMasterProtocolPBServiceImpl.java:90) > at > org.apache.hadoop.yarn.proto.ApplicationMasterProtocol$ApplicationMasterProtocolService$2.callBlockingMethod(ApplicationMasterProtocol.java:95) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:585) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:928) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1962) > at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1958) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:396) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1956) > {code} -- This message was sent by Atlassian JIRA (v6.1.5#6160)