[
https://issues.apache.org/jira/browse/YARN-9183?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Jason Lowe updated YARN-9183:
-
Priority: Blocker (was: Major)
I think this is much worse than just a failed unit test. A simple MapReduce
sleep job no longer succeeds with each map task failing with what looks like
the same error:
{noformat}
2019-01-09 22:59:36,423 WARN [main] org.apache.hadoop.mapred.YarnChild:
Exception running child : org.apache.hadoop.metrics2.MetricsException: Metrics
source RpcDetailedActivityForPort-1 already exists!
at
org.apache.hadoop.metrics2.lib.DefaultMetricsSystem.newSourceName(DefaultMetricsSystem.java:152)
at
org.apache.hadoop.metrics2.lib.DefaultMetricsSystem.sourceName(DefaultMetricsSystem.java:125)
at
org.apache.hadoop.metrics2.impl.MetricsSystemImpl.register(MetricsSystemImpl.java:229)
at
org.apache.hadoop.ipc.metrics.RpcDetailedMetrics.create(RpcDetailedMetrics.java:55)
at org.apache.hadoop.ipc.Client.(Client.java:1341)
at org.apache.hadoop.ipc.ClientCache.getClient(ClientCache.java:57)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.(ProtobufRpcEngine.java:149)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.(ProtobufRpcEngine.java:136)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.(ProtobufRpcEngine.java:120)
at
org.apache.hadoop.ipc.ProtobufRpcEngine.getProxy(ProtobufRpcEngine.java:102)
at org.apache.hadoop.ipc.RPC.getProtocolProxy(RPC.java:624)
at
org.apache.hadoop.hdfs.NameNodeProxiesClient.createProxyWithAlignmentContext(NameNodeProxiesClient.java:370)
at
org.apache.hadoop.hdfs.NameNodeProxiesClient.createNonHAProxyWithClientProtocol(NameNodeProxiesClient.java:348)
at
org.apache.hadoop.hdfs.server.namenode.ha.ClientHAProxyFactory.createProxy(ClientHAProxyFactory.java:46)
at
org.apache.hadoop.hdfs.server.namenode.ha.AbstractNNFailoverProxyProvider.createProxyIfNeeded(AbstractNNFailoverProxyProvider.java:152)
at
org.apache.hadoop.hdfs.server.namenode.ha.IPFailoverProxyProvider.getProxy(IPFailoverProxyProvider.java:57)
at
org.apache.hadoop.hdfs.server.namenode.ha.IPFailoverProxyProvider.getProxy(IPFailoverProxyProvider.java:44)
at
org.apache.hadoop.io.retry.RetryInvocationHandler$ProxyDescriptor.(RetryInvocationHandler.java:197)
at
org.apache.hadoop.io.retry.RetryInvocationHandler.(RetryInvocationHandler.java:328)
at
org.apache.hadoop.io.retry.RetryInvocationHandler.(RetryInvocationHandler.java:322)
at org.apache.hadoop.io.retry.RetryProxy.create(RetryProxy.java:59)
at
org.apache.hadoop.hdfs.NameNodeProxiesClient.createHAProxy(NameNodeProxiesClient.java:326)
at
org.apache.hadoop.hdfs.NameNodeProxiesClient.createProxyWithClientProtocol(NameNodeProxiesClient.java:144)
at org.apache.hadoop.hdfs.DFSClient.(DFSClient.java:356)
at org.apache.hadoop.hdfs.DFSClient.(DFSClient.java:290)
at
org.apache.hadoop.hdfs.DistributedFileSystem.initialize(DistributedFileSystem.java:176)
at
org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3312)
at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:124)
at
org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3361)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:3329)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:479)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:227)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:177)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1876)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:172)
2019-01-09 22:59:36,431 INFO [main] org.apache.hadoop.mapred.Task: Running
cleanup for the task
2019-01-09 22:59:36,432 INFO [main] org.apache.hadoop.mapred.YarnChild:
Exception cleaning up: java.lang.NullPointerException
at org.apache.hadoop.mapred.Task.taskCleanup(Task.java:1458)
at org.apache.hadoop.mapred.YarnChild$3.run(YarnChild.java:200)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1876)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:197)
2019-01-09 22:59:36,544 INFO [main]
org.apache.hadoop.metrics2.impl.MetricsSystemImpl: Stopping MapTask metrics
system...
2019-01-09 22:59:36,544 INFO [main]
org.apache.hadoop.metrics2.impl.MetricsSystemImpl: MapTask metrics system
stopped.
2019-01-09 22:59:36,545 INFO [main]
org.apache.hadoop.metrics2.impl.MetricsSystemImpl: MapTask