[ https://issues.apache.org/jira/browse/YARN-11510?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Shilun Fan updated YARN-11510: ------------------------------ Parent: YARN-5597 Issue Type: Sub-task (was: Improvement) > [Federation] Fix NodeManager#TestFederationInterceptor Flaky Unit Test > ---------------------------------------------------------------------- > > Key: YARN-11510 > URL: https://issues.apache.org/jira/browse/YARN-11510 > Project: Hadoop YARN > Issue Type: Sub-task > Components: federation, nodemanager > Affects Versions: 3.4.0 > Reporter: Shilun Fan > Assignee: Shilun Fan > Priority: Major > Labels: pull-request-available > Fix For: 3.4.0 > > > I am performing overall integration testing for YARN Federation, and I found > that running {{NodeManager#TestFederationInterceptor}} sometimes results in > Flaky Task. This JIRA will fix this issue. > > {code:java} > 2023-06-10 16:38:39,932 ERROR [main] ipc.RPC (RPC.java:stopProxy(811)) - > RPC.stopProxy called on non proxy: > class=org.apache.hadoop.yarn.server.MockResourceManagerFacade > java.lang.IllegalArgumentException: not a proxy instance > at java.lang.reflect.Proxy.getInvocationHandler(Proxy.java:816) > at org.apache.hadoop.ipc.RPC.stopProxy(RPC.java:802) > at > org.apache.hadoop.yarn.server.AMRMClientRelayer.shutdown(AMRMClientRelayer.java:198) > at > org.apache.hadoop.yarn.server.nodemanager.amrmproxy.FederationInterceptor.shutdown(FederationInterceptor.java:883) > at > org.apache.hadoop.yarn.server.nodemanager.amrmproxy.TestFederationInterceptor.tearDown(TestFederationInterceptor.java:145) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56) > at > org.junit.internal.runners.statements.RunAfters.invokeMethod(RunAfters.java:46) > at > org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:33) > at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) > at > org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:366) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63) > at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329) > at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293) > at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) > at org.junit.runners.ParentRunner.run(ParentRunner.java:413) > at org.junit.runner.JUnitCore.run(JUnitCore.java:137) > at > com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:69) > at > com.intellij.rt.junit.IdeaTestRunner$Repeater$1.execute(IdeaTestRunner.java:38) > at > com.intellij.rt.execution.junit.TestsRepeater.repeat(TestsRepeater.java:11) > at > com.intellij.rt.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:35) > at > com.intellij.rt.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:235) > at com.intellij.rt.junit.JUnitStarter.main(JUnitStarter.java:54) {code} > testRecoverWithoutAMRMProxyHA > ``` > java.lang.AssertionError: > Expected :1 > Actual :0 > <Click to see difference> > at org.junit.Assert.fail(Assert.java:89) > at org.junit.Assert.failNotEquals(Assert.java:835) > at org.junit.Assert.assertEquals(Assert.java:647) > at org.junit.Assert.assertEquals(Assert.java:633) > at > org.apache.hadoop.yarn.server.nodemanager.amrmproxy.TestFederationInterceptor$3.run(TestFederationInterceptor.java:594) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1953) > at > org.apache.hadoop.yarn.server.nodemanager.amrmproxy.TestFederationInterceptor.testRecover(TestFederationInterceptor.java:534) > at > org.apache.hadoop.yarn.server.nodemanager.amrmproxy.TestFederationInterceptor.testRecoverWithoutAMRMProxyHA(TestFederationInterceptor.java:517) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) > at > org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27) > at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) > at > org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:366) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63) > at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329) > at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293) > at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) > at org.junit.runners.ParentRunner.run(ParentRunner.java:413) > at org.junit.runner.JUnitCore.run(JUnitCore.java:137) > at > com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:69) > at > com.intellij.rt.junit.IdeaTestRunner$Repeater$1.execute(IdeaTestRunner.java:38) > at > com.intellij.rt.execution.junit.TestsRepeater.repeat(TestsRepeater.java:11) > at > com.intellij.rt.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:35) > at > com.intellij.rt.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:235) > at com.intellij.rt.junit.JUnitStarter.main(JUnitStarter.java:54) > ``` > testSubClusterTimeOut > ``` > 2023-06-10 17:28:24,986 INFO [pool-23-thread-1] uam.UnmanagedAMPoolManager > (UnmanagedAMPoolManager.java:launchUAM(210)) - Launching UAM id SC-1 for > application application_123456_0001 > 2023-06-10 17:28:24,986 ERROR [main] amrmproxy.FederationInterceptor > (FederationInterceptor.java:allocate(780)) - Exception encountered while > processing heart beat for appattempt_123456_0001_000001 > org.apache.hadoop.yarn.exceptions.YarnException: AllocateAsync should not be > called before launchUAM > at > org.apache.hadoop.yarn.server.uam.UnmanagedApplicationManager.allocateAsync(UnmanagedApplicationManager.java:325) > at > org.apache.hadoop.yarn.server.uam.UnmanagedAMPoolManager.allocateAsync(UnmanagedAMPoolManager.java:325) > at > org.apache.hadoop.yarn.server.nodemanager.amrmproxy.FederationInterceptor.sendRequestsToResourceManagers(FederationInterceptor.java:1237) > at > org.apache.hadoop.yarn.server.nodemanager.amrmproxy.FederationInterceptor.allocate(FederationInterceptor.java:735) > at > org.apache.hadoop.yarn.server.nodemanager.amrmproxy.TestFederationInterceptor.getContainersAndAssert(TestFederationInterceptor.java:224) > at > org.apache.hadoop.yarn.server.nodemanager.amrmproxy.TestFederationInterceptor.access$400(TestFederationInterceptor.java:96) > at > org.apache.hadoop.yarn.server.nodemanager.amrmproxy.TestFederationInterceptor$4.run(TestFederationInterceptor.java:792) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1953) > at > org.apache.hadoop.yarn.server.nodemanager.amrmproxy.TestFederationInterceptor.testSubClusterTimeOut(TestFederationInterceptor.java:776) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) > at > org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27) > at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) > at > org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:366) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63) > at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329) > at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293) > at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) > at org.junit.runners.ParentRunner.run(ParentRunner.java:413) > at org.junit.runner.JUnitCore.run(JUnitCore.java:137) > at > com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:69) > at > com.intellij.rt.junit.IdeaTestRunner$Repeater$1.execute(IdeaTestRunner.java:38) > at > com.intellij.rt.execution.junit.TestsRepeater.repeat(TestsRepeater.java:11) > at > com.intellij.rt.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:35) > at > com.intellij.rt.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:235) > at com.intellij.rt.junit.JUnitStarter.main(JUnitStarter.java:54) > ``` -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: yarn-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: yarn-issues-h...@hadoop.apache.org