[ 
https://issues.apache.org/jira/browse/ZOOKEEPER-1678?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13616919#comment-13616919
 ] 

JL commented on ZOOKEEPER-1678:
-------------------------------

*Stack traces*

{code}
2013-03-28 22:27:01
Full thread dump Java HotSpot(TM) 64-Bit Server VM (20.7-b02 mixed mode):

"RecvWorker:2" prio=10 tid=0x00007fb5e0003000 nid=0x3f4e runnable 
[0x00007fb61c5d3000]
   java.lang.Thread.State: RUNNABLE
    at java.net.SocketInputStream.socketRead0(Native Method)
    at java.net.SocketInputStream.read(Unknown Source)
    at java.net.SocketInputStream.read(Unknown Source)
    at java.io.DataInputStream.readInt(Unknown Source)
    at 
org.apache.zookeeper.server.quorum.QuorumCnxManager$RecvWorker.run(QuorumCnxManager.java:747)

"SendWorker:2" prio=10 tid=0x00007fb5e0001800 nid=0x3f4d waiting on condition 
[0x00007fb61c6d4000]
   java.lang.Thread.State: TIMED_WAITING (parking)
    at sun.misc.Unsafe.park(Native Method)
    - parking to wait for  <0x000000008b800228> (a 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
    at java.util.concurrent.locks.LockSupport.parkNanos(Unknown Source)
    at 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(Unknown
 Source)
    at java.util.concurrent.ArrayBlockingQueue.poll(Unknown Source)
    at 
org.apache.zookeeper.server.quorum.QuorumCnxManager.pollSendQueue(QuorumCnxManager.java:831)
    at 
org.apache.zookeeper.server.quorum.QuorumCnxManager.access$500(QuorumCnxManager.java:62)
    at 
org.apache.zookeeper.server.quorum.QuorumCnxManager$SendWorker.run(QuorumCnxManager.java:667)

"RecvWorker:5" prio=10 tid=0x00007fb5e000f800 nid=0x2378 runnable 
[0x00007fb5e7bfa000]
   java.lang.Thread.State: RUNNABLE
    at java.net.SocketInputStream.socketRead0(Native Method)
    at java.net.SocketInputStream.read(Unknown Source)
    at java.net.SocketInputStream.read(Unknown Source)
    at java.io.DataInputStream.readInt(Unknown Source)
    at 
org.apache.zookeeper.server.quorum.QuorumCnxManager$RecvWorker.run(QuorumCnxManager.java:747)

"SendWorker:5" prio=10 tid=0x00007fb5e000d800 nid=0x2377 waiting on condition 
[0x00007fb5e7cfb000]
   java.lang.Thread.State: TIMED_WAITING (parking)
    at sun.misc.Unsafe.park(Native Method)
    - parking to wait for  <0x000000008b800408> (a 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
    at java.util.concurrent.locks.LockSupport.parkNanos(Unknown Source)
    at 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(Unknown
 Source)
    at java.util.concurrent.ArrayBlockingQueue.poll(Unknown Source)
    at 
org.apache.zookeeper.server.quorum.QuorumCnxManager.pollSendQueue(QuorumCnxManager.java:831)
    at 
org.apache.zookeeper.server.quorum.QuorumCnxManager.access$500(QuorumCnxManager.java:62)
    at 
org.apache.zookeeper.server.quorum.QuorumCnxManager$SendWorker.run(QuorumCnxManager.java:667)

"RecvWorker:4" prio=10 tid=0x00007fb5e000c000 nid=0x232a runnable 
[0x00007fb5e7dfc000]
   java.lang.Thread.State: RUNNABLE
    at java.net.SocketInputStream.socketRead0(Native Method)
    at java.net.SocketInputStream.read(Unknown Source)
    at java.net.SocketInputStream.read(Unknown Source)
    at java.io.DataInputStream.readInt(Unknown Source)
    at 
org.apache.zookeeper.server.quorum.QuorumCnxManager$RecvWorker.run(QuorumCnxManager.java:747)

"SendWorker:4" prio=10 tid=0x00007fb5e000a800 nid=0x2329 waiting on condition 
[0x00007fb5e7efd000]
   java.lang.Thread.State: TIMED_WAITING (parking)
    at sun.misc.Unsafe.park(Native Method)
    - parking to wait for  <0x000000008b810878> (a 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
    at java.util.concurrent.locks.LockSupport.parkNanos(Unknown Source)
    at 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(Unknown
 Source)
    at java.util.concurrent.ArrayBlockingQueue.poll(Unknown Source)
    at 
org.apache.zookeeper.server.quorum.QuorumCnxManager.pollSendQueue(QuorumCnxManager.java:831)
    at 
org.apache.zookeeper.server.quorum.QuorumCnxManager.access$500(QuorumCnxManager.java:62)
    at 
org.apache.zookeeper.server.quorum.QuorumCnxManager$SendWorker.run(QuorumCnxManager.java:667)

"QuorumPeer[myid=1]/0:0:0:0:0:0:0:0:2181" prio=10 tid=0x00007fb620178000 
nid=0xecd waiting on condition [0x00007fb61c7d5000]
   java.lang.Thread.State: TIMED_WAITING (parking)
    at sun.misc.Unsafe.park(Native Method)
    - parking to wait for  <0x000000008b813010> (a 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
    at java.util.concurrent.locks.LockSupport.parkNanos(Unknown Source)
    at 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(Unknown
 Source)
    at java.util.concurrent.LinkedBlockingQueue.poll(Unknown Source)
    at 
org.apache.zookeeper.server.quorum.FastLeaderElection.lookForLeader(FastLeaderElection.java:754)
    at org.apache.zookeeper.server.quorum.QuorumPeer.run(QuorumPeer.java:716)

"WorkerReceiver[myid=1]" daemon prio=10 tid=0x00007fb62017c000 nid=0xecc 
waiting on condition [0x00007fb61c8d6000]
   java.lang.Thread.State: TIMED_WAITING (parking)
    at sun.misc.Unsafe.park(Native Method)
    - parking to wait for  <0x000000008b863200> (a 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
    at java.util.concurrent.locks.LockSupport.parkNanos(Unknown Source)
    at 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(Unknown
 Source)
    at java.util.concurrent.ArrayBlockingQueue.poll(Unknown Source)
    at 
org.apache.zookeeper.server.quorum.QuorumCnxManager.pollRecvQueue(QuorumCnxManager.java:883)
    at 
org.apache.zookeeper.server.quorum.FastLeaderElection$Messenger$WorkerReceiver.run(FastLeaderElection.java:205)
    at java.lang.Thread.run(Unknown Source)

"WorkerSender[myid=1]" daemon prio=10 tid=0x00007fb620180000 nid=0xecb waiting 
on condition [0x00007fb61c9d7000]
   java.lang.Thread.State: TIMED_WAITING (parking)
    at sun.misc.Unsafe.park(Native Method)
    - parking to wait for  <0x000000008b82a360> (a 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
    at java.util.concurrent.locks.LockSupport.parkNanos(Unknown Source)
    at 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(Unknown
 Source)
    at java.util.concurrent.LinkedBlockingQueue.poll(Unknown Source)
    at 
org.apache.zookeeper.server.quorum.FastLeaderElection$Messenger$WorkerSender.run(FastLeaderElection.java:362)
    at java.lang.Thread.run(Unknown Source)

"10.51.21.144:3888" prio=10 tid=0x00007fb620185000 nid=0xeca runnable 
[0x00007fb61cad8000]
   java.lang.Thread.State: RUNNABLE
    at java.net.PlainSocketImpl.socketAccept(Native Method)
    at java.net.PlainSocketImpl.accept(Unknown Source)
    - locked <0x000000008b863660> (a java.net.SocksSocketImpl)
    at java.net.ServerSocket.implAccept(Unknown Source)
    at java.net.ServerSocket.accept(Unknown Source)
    at 
org.apache.zookeeper.server.quorum.QuorumCnxManager$Listener.run(QuorumCnxManager.java:491)

"NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181" daemon prio=10 
tid=0x00007fb62018c000 nid=0xec9 runnable [0x00007fb61cbd9000]
   java.lang.Thread.State: RUNNABLE
    at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
    at sun.nio.ch.EPollArrayWrapper.poll(Unknown Source)
    at sun.nio.ch.EPollSelectorImpl.doSelect(Unknown Source)
    at sun.nio.ch.SelectorImpl.lockAndDoSelect(Unknown Source)
    - locked <0x000000008b85ec68> (a sun.nio.ch.Util$2)
    - locked <0x000000008b85ec58> (a java.util.Collections$UnmodifiableSet)
    - locked <0x000000008b85ea00> (a sun.nio.ch.EPollSelectorImpl)
    at sun.nio.ch.SelectorImpl.select(Unknown Source)
    at 
org.apache.zookeeper.server.NIOServerCnxnFactory.run(NIOServerCnxnFactory.java:178)
    at java.lang.Thread.run(Unknown Source)

"PurgeTask" daemon prio=10 tid=0x00007fb620142800 nid=0xeaa in Object.wait() 
[0x00007fb61cde2000]
   java.lang.Thread.State: TIMED_WAITING (on object monitor)
    at java.lang.Object.wait(Native Method)
    - waiting on <0x000000008b863b40> (a java.util.TaskQueue)
    at java.util.TimerThread.mainLoop(Unknown Source)
    - locked <0x000000008b863b40> (a java.util.TaskQueue)
    at java.util.TimerThread.run(Unknown Source)

"RMI TCP Accept-0" daemon prio=10 tid=0x00007fb620123000 nid=0xe39 runnable 
[0x00007fb6244aa000]
   java.lang.Thread.State: RUNNABLE
    at java.net.PlainSocketImpl.socketAccept(Native Method)
    at java.net.PlainSocketImpl.accept(Unknown Source)
    - locked <0x000000008b8640b0> (a java.net.SocksSocketImpl)
    at java.net.ServerSocket.implAccept(Unknown Source)
    at java.net.ServerSocket.accept(Unknown Source)
    at sun.rmi.transport.tcp.TCPTransport$AcceptLoop.executeAcceptLoop(Unknown 
Source)
    at sun.rmi.transport.tcp.TCPTransport$AcceptLoop.run(Unknown Source)
    at java.lang.Thread.run(Unknown Source)

"Low Memory Detector" daemon prio=10 tid=0x00007fb62008a800 nid=0xe38 runnable 
[0x0000000000000000]
   java.lang.Thread.State: RUNNABLE

"C2 CompilerThread1" daemon prio=10 tid=0x00007fb620088800 nid=0xe37 waiting on 
condition [0x0000000000000000]
   java.lang.Thread.State: RUNNABLE

"C2 CompilerThread0" daemon prio=10 tid=0x00007fb620085800 nid=0xe36 waiting on 
condition [0x0000000000000000]
   java.lang.Thread.State: RUNNABLE

"Signal Dispatcher" daemon prio=10 tid=0x00007fb620083800 nid=0xe35 waiting on 
condition [0x0000000000000000]
   java.lang.Thread.State: RUNNABLE

"Finalizer" daemon prio=10 tid=0x00007fb62006a800 nid=0xe31 in Object.wait() 
[0x00007fb624c10000]
   java.lang.Thread.State: WAITING (on object monitor)
    at java.lang.Object.wait(Native Method)
    - waiting on <0x000000008b864868> (a java.lang.ref.ReferenceQueue$Lock)
    at java.lang.ref.ReferenceQueue.remove(Unknown Source)
    - locked <0x000000008b864868> (a java.lang.ref.ReferenceQueue$Lock)
    at java.lang.ref.ReferenceQueue.remove(Unknown Source)
    at java.lang.ref.Finalizer$FinalizerThread.run(Unknown Source)

"Reference Handler" daemon prio=10 tid=0x00007fb620068800 nid=0xe30 in 
Object.wait() [0x00007fb624d11000]
   java.lang.Thread.State: WAITING (on object monitor)
    at java.lang.Object.wait(Native Method)
    - waiting on <0x000000008b863a50> (a java.lang.ref.Reference$Lock)
    at java.lang.Object.wait(Object.java:485)
    at java.lang.ref.Reference$ReferenceHandler.run(Unknown Source)
    - locked <0x000000008b863a50> (a java.lang.ref.Reference$Lock)

"main" prio=10 tid=0x00007fb620008000 nid=0xe2c in Object.wait() 
[0x00007fb626a07000]
   java.lang.Thread.State: WAITING (on object monitor)
    at java.lang.Object.wait(Native Method)
    - waiting on <0x000000008b810990> (a 
org.apache.zookeeper.server.quorum.QuorumPeer)
    at java.lang.Thread.join(Unknown Source)
    - locked <0x000000008b810990> (a 
org.apache.zookeeper.server.quorum.QuorumPeer)
    at java.lang.Thread.join(Unknown Source)
    at 
org.apache.zookeeper.server.quorum.QuorumPeerMain.runFromConfig(QuorumPeerMain.java:152)
    at 
org.apache.zookeeper.server.quorum.QuorumPeerMain.initializeAndRun(QuorumPeerMain.java:111)
    at 
org.apache.zookeeper.server.quorum.QuorumPeerMain.main(QuorumPeerMain.java:78)

"VM Thread" prio=10 tid=0x00007fb620061800 nid=0xe2f runnable

"GC task thread#0 (ParallelGC)" prio=10 tid=0x00007fb62001b000 nid=0xe2d 
runnable

"GC task thread#1 (ParallelGC)" prio=10 tid=0x00007fb62001c800 nid=0xe2e 
runnable

"VM Periodic Task Thread" prio=10 tid=0x00007fb620132000 nid=0xe3a waiting on 
condition

JNI global references: 1530

Heap
 PSYoungGen      total 24448K, used 1510K [0x00000000d92b0000, 
0x00000000db0f0000, 0x0000000100000000)
  eden space 24320K, 5% used 
[0x00000000d92b0000,0x00000000d94118b0,0x00000000daa70000)
  from space 128K, 75% used 
[0x00000000daee0000,0x00000000daef8000,0x00000000daf00000)
  to   space 1088K, 0% used 
[0x00000000dafe0000,0x00000000dafe0000,0x00000000db0f0000)
 PSOldGen        total 79488K, used 626K [0x000000008b800000, 
0x00000000905a0000, 0x00000000d92b0000)
  object space 79488K, 0% used 
[0x000000008b800000,0x000000008b89cae0,0x00000000905a0000)
 PSPermGen       total 21248K, used 10142K [0x0000000086600000, 
0x0000000087ac0000, 0x000000008b800000)
  object space 21248K, 47% used 
[0x0000000086600000,0x0000000086fe78d0,0x0000000087ac0000)
{code}
                
> Server fails to join quorum when a peer is unreachable (5 ZK server setup)
> --------------------------------------------------------------------------
>
>                 Key: ZOOKEEPER-1678
>                 URL: https://issues.apache.org/jira/browse/ZOOKEEPER-1678
>             Project: ZooKeeper
>          Issue Type: Bug
>          Components: leaderElection
>    Affects Versions: 3.4.5
>         Environment: java version "1.6.0_32"
> Java(TM) SE Runtime Environment (build 1.6.0_32-b05)
> Java HotSpot(TM) 64-Bit Server VM (build 20.7-b02, mixed mode)
> Distributor ID:       Ubuntu
> Description:  Ubuntu 12.04.1 LTS
> Release:      12.04
> Codename:     precise
> uname -a Linux ha-vani3-0 3.2.0-23-virtual #36-Ubuntu SMP Tue Apr 10 22:29:03 
> UTC 2012 x86_64 x86_64 x86_64 GNU/Linux
>            Reporter: JL
>
> In a 5-node ZK cluster setup, in the following state:
> * 1 host is down / not reachable.
> * 4 hosts are up.
> * 3 ZK servers are in quorum.
> * a 4th ZK server was restarted and is trying to re-join the quorum.
> The 4th server is not able to rejoin the quorum because the connection to the 
> host that is not established, and apparently takes to long to timeout.
> Stack traces and additional information coming.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira

Reply via email to