[ https://issues.apache.org/jira/browse/YARN-3178?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14317034#comment-14317034 ]
Arpit Gupta commented on YARN-3178: ----------------------------------- Here is an example stack trace {code} 14/02/25 17:40:23 WARN retry.RetryInvocationHandler: Exception while invoking class org.apache.hadoop.yarn.api.impl.pb.client.ApplicationClientProtocolPBClientImpl.getApplicationReport. Not retrying because the invoked method is not idempotent, and unable to determine whether it was invoked java.io.IOException: Failed on local exception: java.io.EOFException; Host Details : local host is: "host/ip"; destination host is: "host":8032; at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:764) at org.apache.hadoop.ipc.Client.call(Client.java:1410) at org.apache.hadoop.ipc.Client.call(Client.java:1359) at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:206) at com.sun.proxy.$Proxy11.getApplicationReport(Unknown Source) at org.apache.hadoop.yarn.api.impl.pb.client.ApplicationClientProtocolPBClientImpl.getApplicationReport(ApplicationClientProtocolPBClientImpl.java:142) at sun.reflect.GeneratedMethodAccessor3.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:601) at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:186) at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102) at com.sun.proxy.$Proxy12.getApplicationReport(Unknown Source) at org.apache.hadoop.yarn.client.api.impl.YarnClientImpl.getApplicationReport(YarnClientImpl.java:268) at org.apache.hadoop.mapred.ResourceMgrDelegate.getApplicationReport(ResourceMgrDelegate.java:294) at org.apache.hadoop.mapred.ClientServiceDelegate.getProxy(ClientServiceDelegate.java:152) at org.apache.hadoop.mapred.ClientServiceDelegate.invoke(ClientServiceDelegate.java:319) at org.apache.hadoop.mapred.ClientServiceDelegate.getJobStatus(ClientServiceDelegate.java:419) at org.apache.hadoop.mapred.YARNRunner.getJobStatus(YARNRunner.java:531) at org.apache.hadoop.mapreduce.Job$1.run(Job.java:314) at org.apache.hadoop.mapreduce.Job$1.run(Job.java:311) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548) at org.apache.hadoop.mapreduce.Job.updateStatus(Job.java:311) at org.apache.hadoop.mapreduce.Job.isComplete(Job.java:599) at org.apache.hadoop.mapreduce.Job.monitorAndPrintJob(Job.java:1344) at org.apache.hadoop.mapred.JobClient$NetworkedJob.monitorAndPrintJob(JobClient.java:407) at org.apache.hadoop.mapred.JobClient.monitorAndPrintJob(JobClient.java:855) at org.apache.hadoop.streaming.StreamJob.submitAndMonitorJob(StreamJob.java:1018) at org.apache.hadoop.streaming.StreamJob.run(StreamJob.java:135) at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70) at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:84) at org.apache.hadoop.streaming.HadoopStreaming.main(HadoopStreaming.java:50) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:601) at org.apache.hadoop.util.RunJar.main(RunJar.java:212) Caused by: java.io.EOFException at java.io.DataInputStream.readInt(DataInputStream.java:392) at org.apache.hadoop.ipc.Client$Connection.receiveRpcResponse(Client.java:1050) at org.apache.hadoop.ipc.Client$Connection.run(Client.java:945) 14/02/25 17:40:23 INFO client.ConfiguredRMFailoverProxyProvider: Failing over to rm2 14/02/25 17:40:23 INFO mapreduce.Job: map 14% reduce 0% 14/02/25 17:40:24 WARN retry.RetryInvocationHandler: Exception while invoking class org.apache.hadoop.yarn.api.impl.pb.client.ApplicationClientProtocolPBClientImpl.getApplicationReport. Not retrying because the invoked method is not idempotent, and unable to determine whether it was invoked java.io.IOException: Failed on local exception: java.io.EOFException; Host Details : local host is: "host/ip"; destination host is: "host":8032; at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:764) at org.apache.hadoop.ipc.Client.call(Client.java:1410) at org.apache.hadoop.ipc.Client.call(Client.java:1359) at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:206) at com.sun.proxy.$Proxy11.getApplicationReport(Unknown Source) at org.apache.hadoop.yarn.api.impl.pb.client.ApplicationClientProtocolPBClientImpl.getApplicationReport(ApplicationClientProtocolPBClientImpl.java:142) at sun.reflect.GeneratedMethodAccessor3.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:601) at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:186) at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102) at com.sun.proxy.$Proxy12.getApplicationReport(Unknown Source) at org.apache.hadoop.yarn.client.api.impl.YarnClientImpl.getApplicationReport(YarnClientImpl.java:268) at org.apache.hadoop.mapred.ResourceMgrDelegate.getApplicationReport(ResourceMgrDelegate.java:294) at org.apache.hadoop.mapred.ClientServiceDelegate.getProxy(ClientServiceDelegate.java:152) at org.apache.hadoop.mapred.ClientServiceDelegate.invoke(ClientServiceDelegate.java:319) at org.apache.hadoop.mapred.ClientServiceDelegate.getJobStatus(ClientServiceDelegate.java:419) at org.apache.hadoop.mapred.YARNRunner.getJobStatus(YARNRunner.java:531) at org.apache.hadoop.mapreduce.Job$1.run(Job.java:314) at org.apache.hadoop.mapreduce.Job$1.run(Job.java:311) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548) at org.apache.hadoop.mapreduce.Job.updateStatus(Job.java:311) at org.apache.hadoop.mapreduce.Job.isComplete(Job.java:599) at org.apache.hadoop.mapreduce.Job.monitorAndPrintJob(Job.java:1344) at org.apache.hadoop.mapred.JobClient$NetworkedJob.monitorAndPrintJob(JobClient.java:407) at org.apache.hadoop.mapred.JobClient.monitorAndPrintJob(JobClient.java:855) at org.apache.hadoop.streaming.StreamJob.submitAndMonitorJob(StreamJob.java:1018) at org.apache.hadoop.streaming.StreamJob.run(StreamJob.java:135) at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70) at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:84) at org.apache.hadoop.streaming.HadoopStreaming.main(HadoopStreaming.java:50) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:601) at org.apache.hadoop.util.RunJar.main(RunJar.java:212) Caused by: java.io.EOFException at java.io.DataInputStream.readInt(DataInputStream.java:392) at org.apache.hadoop.ipc.Client$Connection.receiveRpcResponse(Client.java:1050) at org.apache.hadoop.ipc.Client$Connection.run(Client.java:945) 14/02/25 17:40:24 INFO client.ConfiguredRMFailoverProxyProvider: Failing over to rm2 14/02/25 17:40:24 WARN retry.RetryInvocationHandler: Exception while invoking class org.apache.hadoop.yarn.api.impl.pb.client.ApplicationClientProtocolPBClientImpl.getApplicationReport. Not retrying because the invoked method is not idempotent, and unable to determine whether it was invoked java.io.IOException: Failed on local exception: java.io.EOFException; Host Details : local host is: "host/ip"; destination host is: "host":8032; at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:764) at org.apache.hadoop.ipc.Client.call(Client.java:1410) at org.apache.hadoop.ipc.Client.call(Client.java:1359) at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:206) at com.sun.proxy.$Proxy11.getApplicationReport(Unknown Source) at org.apache.hadoop.yarn.api.impl.pb.client.ApplicationClientProtocolPBClientImpl.getApplicationReport(ApplicationClientProtocolPBClientImpl.java:142) at sun.reflect.GeneratedMethodAccessor3.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:601) at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:186) at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102) at com.sun.proxy.$Proxy12.getApplicationReport(Unknown Source) at org.apache.hadoop.yarn.client.api.impl.YarnClientImpl.getApplicationReport(YarnClientImpl.java:268) at org.apache.hadoop.mapred.ResourceMgrDelegate.getApplicationReport(ResourceMgrDelegate.java:294) at org.apache.hadoop.mapred.ClientServiceDelegate.getProxy(ClientServiceDelegate.java:152) at org.apache.hadoop.mapred.ClientServiceDelegate.invoke(ClientServiceDelegate.java:319) at org.apache.hadoop.mapred.ClientServiceDelegate.getJobStatus(ClientServiceDelegate.java:419) at org.apache.hadoop.mapred.YARNRunner.getJobStatus(YARNRunner.java:531) at org.apache.hadoop.mapreduce.Job$1.run(Job.java:314) at org.apache.hadoop.mapreduce.Job$1.run(Job.java:311) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548) at org.apache.hadoop.mapreduce.Job.updateStatus(Job.java:311) at org.apache.hadoop.mapreduce.Job.isComplete(Job.java:599) at org.apache.hadoop.mapreduce.Job.monitorAndPrintJob(Job.java:1344) at org.apache.hadoop.mapred.JobClient$NetworkedJob.monitorAndPrintJob(JobClient.java:407) at org.apache.hadoop.mapred.JobClient.monitorAndPrintJob(JobClient.java:855) at org.apache.hadoop.streaming.StreamJob.submitAndMonitorJob(StreamJob.java:1018) at org.apache.hadoop.streaming.StreamJob.run(StreamJob.java:135) at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70) at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:84) at org.apache.hadoop.streaming.HadoopStreaming.main(HadoopStreaming.java:50) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:601) at org.apache.hadoop.util.RunJar.main(RunJar.java:212) Caused by: java.io.EOFException at java.io.DataInputStream.readInt(DataInputStream.java:392) at org.apache.hadoop.ipc.Client$Connection.receiveRpcResponse(Client.java:1050) at org.apache.hadoop.ipc.Client$Connection.run(Client.java:945) 14/02/25 17:40:24 WARN retry.RetryInvocationHandler: Exception while invoking class org.apache.hadoop.yarn.api.impl.pb.client.ApplicationClientProtocolPBClientImpl.getApplicationReport. Not retrying because the invoked method is not idempotent, and unable to determine whether it was invoked java.io.IOException: Failed on local exception: java.io.EOFException; Host Details : local host is: "host/ip"; destination host is: "host":8032; at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:764) at org.apache.hadoop.ipc.Client.call(Client.java:1410) at org.apache.hadoop.ipc.Client.call(Client.java:1359) at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:206) at com.sun.proxy.$Proxy11.getApplicationReport(Unknown Source) at org.apache.hadoop.yarn.api.impl.pb.client.ApplicationClientProtocolPBClientImpl.getApplicationReport(ApplicationClientProtocolPBClientImpl.java:142) at sun.reflect.GeneratedMethodAccessor3.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:601) at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:186) at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102) at com.sun.proxy.$Proxy12.getApplicationReport(Unknown Source) at org.apache.hadoop.yarn.client.api.impl.YarnClientImpl.getApplicationReport(YarnClientImpl.java:268) at org.apache.hadoop.mapred.ResourceMgrDelegate.getApplicationReport(ResourceMgrDelegate.java:294) at org.apache.hadoop.mapred.ClientServiceDelegate.getProxy(ClientServiceDelegate.java:152) at org.apache.hadoop.mapred.ClientServiceDelegate.invoke(ClientServiceDelegate.java:319) at org.apache.hadoop.mapred.ClientServiceDelegate.getJobStatus(ClientServiceDelegate.java:419) at org.apache.hadoop.mapred.YARNRunner.getJobStatus(YARNRunner.java:531) at org.apache.hadoop.mapreduce.Job$1.run(Job.java:314) at org.apache.hadoop.mapreduce.Job$1.run(Job.java:311) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548) at org.apache.hadoop.mapreduce.Job.updateStatus(Job.java:311) at org.apache.hadoop.mapreduce.Job.isComplete(Job.java:599) at org.apache.hadoop.mapreduce.Job.monitorAndPrintJob(Job.java:1344) at org.apache.hadoop.mapred.JobClient$NetworkedJob.monitorAndPrintJob(JobClient.java:407) at org.apache.hadoop.mapred.JobClient.monitorAndPrintJob(JobClient.java:855) at org.apache.hadoop.streaming.StreamJob.submitAndMonitorJob(StreamJob.java:1018) at org.apache.hadoop.streaming.StreamJob.run(StreamJob.java:135) at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70) at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:84) at org.apache.hadoop.streaming.HadoopStreaming.main(HadoopStreaming.java:50) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:601) at org.apache.hadoop.util.RunJar.main(RunJar.java:212) Caused by: java.io.EOFException at java.io.DataInputStream.readInt(DataInputStream.java:392) at org.apache.hadoop.ipc.Client$Connection.receiveRpcResponse(Client.java:1050) at org.apache.hadoop.ipc.Client$Connection.run(Client.java:945) 14/02/25 17:40:24 WARN retry.RetryInvocationHandler: Exception while invoking class org.apache.hadoop.yarn.api.impl.pb.client.ApplicationClientProtocolPBClientImpl.getApplicationReport. Not retrying because the invoked method is not idempotent, and unable to determine whether it was invoked java.io.IOException: Failed on local exception: java.io.EOFException; Host Details : local host is: "host/ip"; destination host is: "host":8032; at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:764) at org.apache.hadoop.ipc.Client.call(Client.java:1410) at org.apache.hadoop.ipc.Client.call(Client.java:1359) at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:206) at com.sun.proxy.$Proxy11.getApplicationReport(Unknown Source) at org.apache.hadoop.yarn.api.impl.pb.client.ApplicationClientProtocolPBClientImpl.getApplicationReport(ApplicationClientProtocolPBClientImpl.java:142) at sun.reflect.GeneratedMethodAccessor3.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:601) at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:186) at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102) at com.sun.proxy.$Proxy12.getApplicationReport(Unknown Source) at org.apache.hadoop.yarn.client.api.impl.YarnClientImpl.getApplicationReport(YarnClientImpl.java:268) at org.apache.hadoop.mapred.ResourceMgrDelegate.getApplicationReport(ResourceMgrDelegate.java:294) at org.apache.hadoop.mapred.ClientServiceDelegate.getProxy(ClientServiceDelegate.java:152) at org.apache.hadoop.mapred.ClientServiceDelegate.invoke(ClientServiceDelegate.java:319) at org.apache.hadoop.mapred.ClientServiceDelegate.getJobStatus(ClientServiceDelegate.java:419) at org.apache.hadoop.mapred.YARNRunner.getJobStatus(YARNRunner.java:531) at org.apache.hadoop.mapreduce.Job$1.run(Job.java:314) at org.apache.hadoop.mapreduce.Job$1.run(Job.java:311) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548) at org.apache.hadoop.mapreduce.Job.updateStatus(Job.java:311) at org.apache.hadoop.mapreduce.Job.isComplete(Job.java:599) at org.apache.hadoop.mapreduce.Job.monitorAndPrintJob(Job.java:1344) at org.apache.hadoop.mapred.JobClient$NetworkedJob.monitorAndPrintJob(JobClient.java:407) at org.apache.hadoop.mapred.JobClient.monitorAndPrintJob(JobClient.java:855) at org.apache.hadoop.streaming.StreamJob.submitAndMonitorJob(StreamJob.java:1018) at org.apache.hadoop.streaming.StreamJob.run(StreamJob.java:135) at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70) at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:84) at org.apache.hadoop.streaming.HadoopStreaming.main(HadoopStreaming.java:50) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:601) at org.apache.hadoop.util.RunJar.main(RunJar.java:212) Caused by: java.io.EOFException at java.io.DataInputStream.readInt(DataInputStream.java:392) at org.apache.hadoop.ipc.Client$Connection.receiveRpcResponse(Client.java:1050) at org.apache.hadoop.ipc.Client$Connection.run(Client.java:945) {code} > Clean up stack trace on the client when RM fails over > ----------------------------------------------------- > > Key: YARN-3178 > URL: https://issues.apache.org/jira/browse/YARN-3178 > Project: Hadoop YARN > Issue Type: Improvement > Components: client > Reporter: Arpit Gupta > Priority: Minor > > When the client fails over it spits out a stack trace. It will be good to > clean that up. -- This message was sent by Atlassian JIRA (v6.3.4#6332)