[ 
https://issues.apache.org/jira/browse/YARN-7803?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Miklos Szegedi updated YARN-7803:
---------------------------------
    Description: 
I see this sometimes in Apache jenkins runs sometimes but pretty consistently 
with the official Jenkins Docker image jenkins/jenkins:lts.
{code}
[ERROR] 
testGracefulFailoverMultipleZKfcs(org.apache.hadoop.ha.TestZKFailoverController)
 Time elapsed: 70.35 s <<< ERROR! org.apache.hadoop.ha.ServiceFailedException: 
Unable to become active. Local node did not get an opportunity to do so from 
ZooKeeper, or the local node took too long to transition to active. at 
org.apache.hadoop.ha.ZKFailoverController.doGracefulFailover(ZKFailoverController.java:692)
 at 
org.apache.hadoop.ha.ZKFailoverController.access$400(ZKFailoverController.java:60)
 at 
org.apache.hadoop.ha.ZKFailoverController$3.run(ZKFailoverController.java:609) 
at 
org.apache.hadoop.ha.ZKFailoverController$3.run(ZKFailoverController.java:606) 
at java.security.AccessController.doPrivileged(Native Method) at 
javax.security.auth.Subject.doAs(Subject.java:422) at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1965)
 at 
org.apache.hadoop.ha.ZKFailoverController.gracefulFailoverToYou(ZKFailoverController.java:606)
 at org.apache.hadoop.ha.ZKFCRpcServer.gracefulFailover(ZKFCRpcServer.java:94) 
at 
org.apache.hadoop.ha.TestZKFailoverController.testGracefulFailoverMultipleZKfcs(TestZKFailoverController.java:586)
 at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) 
at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 at java.lang.reflect.Method.invoke(Method.java:498) at 
org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47)
 at 
org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
 at 
org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44)
 at 
org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
 at 
org.apache.zookeeper.JUnit4ZKTestRunner$LoggedInvokeMethod.evaluate(JUnit4ZKTestRunner.java:55)
 at 
org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) 
at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27) 
at org.junit.rules.TestWatchman$1.evaluate(TestWatchman.java:53) at 
org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74)
{code}

  was:[ERROR] 
testGracefulFailoverMultipleZKfcs(org.apache.hadoop.ha.TestZKFailoverController)
 Time elapsed: 70.35 s <<< ERROR! org.apache.hadoop.ha.ServiceFailedException: 
Unable to become active. Local node did not get an opportunity to do so from 
ZooKeeper, or the local node took too long to transition to active. at 
org.apache.hadoop.ha.ZKFailoverController.doGracefulFailover(ZKFailoverController.java:692)
 at 
org.apache.hadoop.ha.ZKFailoverController.access$400(ZKFailoverController.java:60)
 at 
org.apache.hadoop.ha.ZKFailoverController$3.run(ZKFailoverController.java:609) 
at 
org.apache.hadoop.ha.ZKFailoverController$3.run(ZKFailoverController.java:606) 
at java.security.AccessController.doPrivileged(Native Method) at 
javax.security.auth.Subject.doAs(Subject.java:422) at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1965)
 at 
org.apache.hadoop.ha.ZKFailoverController.gracefulFailoverToYou(ZKFailoverController.java:606)
 at org.apache.hadoop.ha.ZKFCRpcServer.gracefulFailover(ZKFCRpcServer.java:94) 
at 
org.apache.hadoop.ha.TestZKFailoverController.testGracefulFailoverMultipleZKfcs(TestZKFailoverController.java:586)
 at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) 
at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 at java.lang.reflect.Method.invoke(Method.java:498) at 
org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47)
 at 
org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
 at 
org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44)
 at 
org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
 at 
org.apache.zookeeper.JUnit4ZKTestRunner$LoggedInvokeMethod.evaluate(JUnit4ZKTestRunner.java:55)
 at 
org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) 
at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27) 
at org.junit.rules.TestWatchman$1.evaluate(TestWatchman.java:53) at 
org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74)


> TestZKFailoverController occasionally fails in trunk
> ----------------------------------------------------
>
>                 Key: YARN-7803
>                 URL: https://issues.apache.org/jira/browse/YARN-7803
>             Project: Hadoop YARN
>          Issue Type: Bug
>    Affects Versions: 3.1.0
>            Reporter: Miklos Szegedi
>            Priority: Major
>
> I see this sometimes in Apache jenkins runs sometimes but pretty consistently 
> with the official Jenkins Docker image jenkins/jenkins:lts.
> {code}
> [ERROR] 
> testGracefulFailoverMultipleZKfcs(org.apache.hadoop.ha.TestZKFailoverController)
>  Time elapsed: 70.35 s <<< ERROR! 
> org.apache.hadoop.ha.ServiceFailedException: Unable to become active. Local 
> node did not get an opportunity to do so from ZooKeeper, or the local node 
> took too long to transition to active. at 
> org.apache.hadoop.ha.ZKFailoverController.doGracefulFailover(ZKFailoverController.java:692)
>  at 
> org.apache.hadoop.ha.ZKFailoverController.access$400(ZKFailoverController.java:60)
>  at 
> org.apache.hadoop.ha.ZKFailoverController$3.run(ZKFailoverController.java:609)
>  at 
> org.apache.hadoop.ha.ZKFailoverController$3.run(ZKFailoverController.java:606)
>  at java.security.AccessController.doPrivileged(Native Method) at 
> javax.security.auth.Subject.doAs(Subject.java:422) at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1965)
>  at 
> org.apache.hadoop.ha.ZKFailoverController.gracefulFailoverToYou(ZKFailoverController.java:606)
>  at 
> org.apache.hadoop.ha.ZKFCRpcServer.gracefulFailover(ZKFCRpcServer.java:94) at 
> org.apache.hadoop.ha.TestZKFailoverController.testGracefulFailoverMultipleZKfcs(TestZKFailoverController.java:586)
>  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) 
> at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>  at java.lang.reflect.Method.invoke(Method.java:498) at 
> org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47)
>  at 
> org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
>  at 
> org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44)
>  at 
> org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
>  at 
> org.apache.zookeeper.JUnit4ZKTestRunner$LoggedInvokeMethod.evaluate(JUnit4ZKTestRunner.java:55)
>  at 
> org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) 
> at 
> org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27) 
> at org.junit.rules.TestWatchman$1.evaluate(TestWatchman.java:53) at 
> org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74)
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to