[
https://issues.apache.org/jira/browse/GEODE-1403?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15293985#comment-15293985
]
Jianxia Chen commented on GEODE-1403:
-------------------------------------
Highly likely a hardware or OS issue. Not a product or test bug.
This is not a test for member join/leave. However, every single VM in the test
is a suspect member at some point of time during the test, as indicated in the
log. This is unexpected test behavior. Lots of suspect member messages and
final checks. Eventually three members have been kicked out of the distributed
system, including the accessor who reports the error of the ticket. Once the
accessor is force disconnected, a null region as in the above stack trace is
not a surprise.
By the way, the machine that runs the CI has experienced problems recently
(e.g. slow in response) and has been rebooted twice within a week.
> CI Failure: PartitionRegionHelperDUnitTest.testMembersForKey
> ------------------------------------------------------------
>
> Key: GEODE-1403
> URL: https://issues.apache.org/jira/browse/GEODE-1403
> Project: Geode
> Issue Type: Bug
> Components: membership
> Reporter: Sai Boorlagadda
> Labels: ci
> Attachments: geode-1403-test.log.gz
>
>
> {noformat}
> Error Message
> com.gemstone.gemfire.test.dunit.RMIException: While invoking
> com.gemstone.gemfire.cache.partition.PartitionRegionHelperDUnitTest$14.call
> in VM 0 running on Host rooktwo.gemstone.com with 4 VMs
> Stacktrace
> com.gemstone.gemfire.test.dunit.RMIException: While invoking
> com.gemstone.gemfire.cache.partition.PartitionRegionHelperDUnitTest$14.call
> in VM 0 running on Host rooktwo.gemstone.com with 4 VMs
> at com.gemstone.gemfire.test.dunit.VM.invoke(VM.java:389)
> at com.gemstone.gemfire.test.dunit.VM.invoke(VM.java:355)
> at com.gemstone.gemfire.test.dunit.VM.invoke(VM.java:320)
> at
> com.gemstone.gemfire.cache.partition.PartitionRegionHelperDUnitTest.testMembersForKey(PartitionRegionHelperDUnitTest.java:382)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:497)
> at junit.framework.TestCase.runTest(TestCase.java:176)
> at junit.framework.TestCase.runBare(TestCase.java:141)
> at junit.framework.TestResult$1.protect(TestResult.java:122)
> at junit.framework.TestResult.runProtected(TestResult.java:142)
> at junit.framework.TestResult.run(TestResult.java:125)
> at junit.framework.TestCase.run(TestCase.java:129)
> at junit.framework.TestSuite.runTest(TestSuite.java:252)
> at junit.framework.TestSuite.run(TestSuite.java:247)
> at
> org.junit.internal.runners.JUnit38ClassRunner.run(JUnit38ClassRunner.java:86)
> at
> org.gradle.api.internal.tasks.testing.junit.JUnitTestClassExecuter.runTestClass(JUnitTestClassExecuter.java:112)
> at
> org.gradle.api.internal.tasks.testing.junit.JUnitTestClassExecuter.execute(JUnitTestClassExecuter.java:56)
> at
> org.gradle.api.internal.tasks.testing.junit.JUnitTestClassProcessor.processTestClass(JUnitTestClassProcessor.java:66)
> at
> org.gradle.api.internal.tasks.testing.SuiteTestClassProcessor.processTestClass(SuiteTestClassProcessor.java:51)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:497)
> at
> org.gradle.messaging.dispatch.ReflectionDispatch.dispatch(ReflectionDispatch.java:35)
> at
> org.gradle.messaging.dispatch.ReflectionDispatch.dispatch(ReflectionDispatch.java:24)
> at
> org.gradle.messaging.dispatch.ContextClassLoaderDispatch.dispatch(ContextClassLoaderDispatch.java:32)
> at
> org.gradle.messaging.dispatch.ProxyDispatchAdapter$DispatchingInvocationHandler.invoke(ProxyDispatchAdapter.java:93)
> at com.sun.proxy.$Proxy2.processTestClass(Unknown Source)
> at
> org.gradle.api.internal.tasks.testing.worker.TestWorker.processTestClass(TestWorker.java:109)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:497)
> at
> org.gradle.messaging.dispatch.ReflectionDispatch.dispatch(ReflectionDispatch.java:35)
> at
> org.gradle.messaging.dispatch.ReflectionDispatch.dispatch(ReflectionDispatch.java:24)
> at
> org.gradle.messaging.remote.internal.hub.MessageHub$Handler.run(MessageHub.java:360)
> at
> org.gradle.internal.concurrent.ExecutorPolicy$CatchAndRecordFailures.onExecute(ExecutorPolicy.java:54)
> at
> org.gradle.internal.concurrent.StoppableExecutorImpl$1.run(StoppableExecutorImpl.java:40)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:745)
> Caused by: java.lang.IllegalArgumentException: Argument 'Region' is null
> at
> com.gemstone.gemfire.cache.partition.PartitionRegionHelper.isPartitionedRegion(PartitionRegionHelper.java:128)
> at
> com.gemstone.gemfire.cache.partition.PartitionRegionHelper.isPartitionedCheck(PartitionRegionHelper.java:142)
> at
> com.gemstone.gemfire.cache.partition.PartitionRegionHelper.getPrimaryMemberForKey(PartitionRegionHelper.java:269)
> at
> com.gemstone.gemfire.cache.partition.PartitionRegionHelperDUnitTest$14.call(PartitionRegionHelperDUnitTest.java:388)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:497)
> at hydra.MethExecutor.executeObject(MethExecutor.java:268)
> at
> com.gemstone.gemfire.test.dunit.standalone.RemoteDUnitVM.executeMethodOnObject(RemoteDUnitVM.java:82)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:497)
> at sun.rmi.server.UnicastServerRef.dispatch(UnicastServerRef.java:323)
> at sun.rmi.transport.Transport$1.run(Transport.java:200)
> at sun.rmi.transport.Transport$1.run(Transport.java:197)
> at java.security.AccessController.doPrivileged(Native Method)
> at sun.rmi.transport.Transport.serviceCall(Transport.java:196)
> at
> sun.rmi.transport.tcp.TCPTransport.handleMessages(TCPTransport.java:568)
> at
> sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run0(TCPTransport.java:826)
> at
> sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.lambda$run$95(TCPTransport.java:683)
> at java.security.AccessController.doPrivileged(Native Method)
> at
> sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run(TCPTransport.java:682)
> ... 3 more
> Standard Error
> [vm_0]java.lang.IllegalStateException: Target member does not exist or is not
> a data store for the partitioned region /region1: rooktwo(26588)<ec>:5
> [vm_0]java.lang.IllegalStateException: Source member does not exist or is not
> a data store for the partitioned region /region1: rooktwo(26588)<ec>:5
> [vm_0]java.lang.IllegalStateException: Target member is the same as source
> member for the partitioned region /region1: rooktwo(26588)<ec><v1>:1025
> Suspicious strings were written to the log during this run.
> Fix the strings or use IgnoredException.addIgnoredException to ignore.
> -----------------------------------------------------------------------
> Found suspect string in log4j at line 1998
> [fatal 2016/05/17 02:50:05.304 PDT <Geode Membership View Creator> tid=0x27]
> Possible loss of quorum due to the loss of 3 cache processes:
> [rooktwo(26601)<ec><v9>:1027, rooktwo(26588)<ec><v7>:1025,
> rooktwo(26592)<ec><v8>:1026]
> -----------------------------------------------------------------------
> Found suspect string in log4j at line 2005
> [fatal 2016/05/17 02:50:05.304 PDT <unicast receiver,rooktwo-30494> tid=0x5f]
> Membership service failure: Failed to acknowledge a new membership view and
> then failed tcp/ip connection attempt
> com.gemstone.gemfire.ForcedDisconnectException: Failed to acknowledge a new
> membership view and then failed tcp/ip connection attempt
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.mgr.GMSMembershipManager.forceDisconnect(GMSMembershipManager.java:2586)
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.membership.GMSJoinLeave.forceDisconnect(GMSJoinLeave.java:917)
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.membership.GMSJoinLeave.processRemoveRequest(GMSJoinLeave.java:592)
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.membership.GMSJoinLeave.processMessage(GMSJoinLeave.java:1554)
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.messenger.JGroupsMessenger$JGroupsReceiver.receive(JGroupsMessenger.java:1098)
> at org.jgroups.JChannel.invokeCallback(JChannel.java:816)
> at org.jgroups.JChannel.up(JChannel.java:741)
> at org.jgroups.stack.ProtocolStack.up(ProtocolStack.java:1030)
> at org.jgroups.protocols.FRAG2.up(FRAG2.java:165)
> at org.jgroups.protocols.FlowControl.up(FlowControl.java:392)
> at org.jgroups.protocols.UNICAST3.deliverMessage(UNICAST3.java:1064)
> at org.jgroups.protocols.UNICAST3.handleDataReceived(UNICAST3.java:779)
> at org.jgroups.protocols.UNICAST3.up(UNICAST3.java:426)
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.messenger.StatRecorder.up(StatRecorder.java:75)
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.messenger.AddressManager.up(AddressManager.java:75)
> at org.jgroups.protocols.TP.passMessageUp(TP.java:1567)
> at org.jgroups.protocols.TP$SingleMessageHandler.run(TP.java:1783)
> at org.jgroups.util.DirectExecutor.execute(DirectExecutor.java:10)
> at org.jgroups.protocols.TP.handleSingleMessage(TP.java:1695)
> at org.jgroups.protocols.TP.receive(TP.java:1620)
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.messenger.Transport.receive(Transport.java:160)
> at org.jgroups.protocols.UDP$PacketReceiver.run(UDP.java:701)
> at java.lang.Thread.run(Thread.java:745)
> -----------------------------------------------------------------------
> Found suspect string in log4j at line 2066
> [fatal 2016/05/17 02:50:05.357 PDT <unicast receiver,rooktwo-5450> tid=0x20]
> Possible loss of quorum due to the loss of 3 cache processes:
> [rooktwo(26601)<ec><v9>:1027, rooktwo(26588)<ec><v7>:1025,
> rooktwo(26592)<ec><v8>:1026]
> -----------------------------------------------------------------------
> Found suspect string in log4j at line 2130
> [fatal 2016/05/17 02:50:05.644 PDT <unicast receiver,rooktwo-46883> tid=0x68]
> Membership service failure: Member isn't responding to heartbeat requests
> com.gemstone.gemfire.ForcedDisconnectException: Member isn't responding to
> heartbeat requests
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.mgr.GMSMembershipManager.forceDisconnect(GMSMembershipManager.java:2586)
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.membership.GMSJoinLeave.forceDisconnect(GMSJoinLeave.java:917)
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.membership.GMSJoinLeave.processRemoveRequest(GMSJoinLeave.java:592)
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.membership.GMSJoinLeave.processMessage(GMSJoinLeave.java:1554)
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.messenger.JGroupsMessenger$JGroupsReceiver.receive(JGroupsMessenger.java:1098)
> at org.jgroups.JChannel.invokeCallback(JChannel.java:816)
> at org.jgroups.JChannel.up(JChannel.java:741)
> at org.jgroups.stack.ProtocolStack.up(ProtocolStack.java:1030)
> at org.jgroups.protocols.FRAG2.up(FRAG2.java:165)
> at org.jgroups.protocols.FlowControl.up(FlowControl.java:392)
> at org.jgroups.protocols.UNICAST3.deliverMessage(UNICAST3.java:1064)
> at org.jgroups.protocols.UNICAST3.handleDataReceived(UNICAST3.java:779)
> at org.jgroups.protocols.UNICAST3.up(UNICAST3.java:426)
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.messenger.StatRecorder.up(StatRecorder.java:75)
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.messenger.AddressManager.up(AddressManager.java:75)
> at org.jgroups.protocols.TP.passMessageUp(TP.java:1567)
> at org.jgroups.protocols.TP$SingleMessageHandler.run(TP.java:1783)
> at org.jgroups.util.DirectExecutor.execute(DirectExecutor.java:10)
> at org.jgroups.protocols.TP.handleSingleMessage(TP.java:1695)
> at org.jgroups.protocols.TP.receive(TP.java:1620)
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.messenger.Transport.receive(Transport.java:160)
> at org.jgroups.protocols.UDP$PacketReceiver.run(UDP.java:701)
> at java.lang.Thread.run(Thread.java:745)
> -----------------------------------------------------------------------
> Found suspect string in log4j at line 2186
> [fatal 2016/05/17 02:50:05.307 PDT <unicast receiver,rooktwo-36097> tid=0x63]
> Membership service failure: Member isn't responding to heartbeat requests
> com.gemstone.gemfire.ForcedDisconnectException: Member isn't responding to
> heartbeat requests
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.mgr.GMSMembershipManager.forceDisconnect(GMSMembershipManager.java:2586)
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.membership.GMSJoinLeave.forceDisconnect(GMSJoinLeave.java:917)
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.membership.GMSJoinLeave.processRemoveRequest(GMSJoinLeave.java:592)
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.membership.GMSJoinLeave.processMessage(GMSJoinLeave.java:1554)
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.messenger.JGroupsMessenger$JGroupsReceiver.receive(JGroupsMessenger.java:1098)
> at org.jgroups.JChannel.invokeCallback(JChannel.java:816)
> at org.jgroups.JChannel.up(JChannel.java:741)
> at org.jgroups.stack.ProtocolStack.up(ProtocolStack.java:1030)
> at org.jgroups.protocols.FRAG2.up(FRAG2.java:165)
> at org.jgroups.protocols.FlowControl.up(FlowControl.java:392)
> at org.jgroups.protocols.UNICAST3.deliverMessage(UNICAST3.java:1064)
> at org.jgroups.protocols.UNICAST3.handleDataReceived(UNICAST3.java:779)
> at org.jgroups.protocols.UNICAST3.up(UNICAST3.java:426)
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.messenger.StatRecorder.up(StatRecorder.java:75)
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.messenger.AddressManager.up(AddressManager.java:75)
> at org.jgroups.protocols.TP.passMessageUp(TP.java:1567)
> at org.jgroups.protocols.TP$SingleMessageHandler.run(TP.java:1783)
> at org.jgroups.util.DirectExecutor.execute(DirectExecutor.java:10)
> at org.jgroups.protocols.TP.handleSingleMessage(TP.java:1695)
> at org.jgroups.protocols.TP.receive(TP.java:1620)
> at
> com.gemstone.gemfire.distributed.internal.membership.gms.messenger.Transport.receive(Transport.java:160)
> at org.jgroups.protocols.UDP$PacketReceiver.run(UDP.java:701)
> at java.lang.Thread.run(Thread.java:745)
> [vm_0]java.lang.IllegalStateException: Unable to move bucket for /region1.
> Target member rooktwo(26592)<ec><v20>:1026 is already hosting bucket 1
> [vm_0]java.lang.IllegalStateException: The bucket for key 1, bucket 1, region
> /region1 is not hosted by rooktwo(26601)<ec><v21>:1027. Members hosting:
> [Member(id=rooktwo(26588)<ec><v19>:1025),
> Member(id=rooktwo(26592)<ec><v20>:1026)]
> [vm_0]java.lang.IllegalStateException: Target member does not exist or is not
> a data store for the partitioned region /region1: rooktwo(26588)<ec>:5
> [vm_0]java.lang.IllegalStateException: The bucket for key 10, bucket 10,
> region /region1 does not exist
> {noformat}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)