[
https://issues.apache.org/jira/browse/GEODE-7460?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17058970#comment-17058970
]
Ivan Godwin commented on GEODE-7460:
------------------------------------
Failures in CI with following output:
{code:java}
org.apache.geode.distributed.DistributedMemberDUnitTest > testGroupsInAllVMs
FAILED org.apache.geode.test.dunit.RMIException: While invoking
org.apache.geode.distributed.DistributedMemberDUnitTest$6.run in VM 0 running
on Host b8a929243fc3 with 4 VMs at
org.apache.geode.test.dunit.VM.executeMethodOnObject(VM.java:610) at
org.apache.geode.test.dunit.VM.invoke(VM.java:437) at
org.apache.geode.distributed.DistributedMemberDUnitTest.testGroupsInAllVMs(DistributedMemberDUnitTest.java:334)
Caused by: org.apache.geode.SystemConnectException: One or more
peers generated exceptions during connection attempt at
org.apache.geode.distributed.internal.ClusterDistributionManager.sendStartupMessage(ClusterDistributionManager.java:1626)
at
org.apache.geode.distributed.internal.ClusterDistributionManager.create(ClusterDistributionManager.java:361)
at
org.apache.geode.distributed.internal.InternalDistributedSystem.initialize(InternalDistributedSystem.java:779)
at
org.apache.geode.distributed.internal.InternalDistributedSystem.access$200(InternalDistributedSystem.java:135)
at
org.apache.geode.distributed.internal.InternalDistributedSystem$Builder.build(InternalDistributedSystem.java:3036)
at
org.apache.geode.distributed.internal.InternalDistributedSystem.connectInternal(InternalDistributedSystem.java:290)
at
org.apache.geode.distributed.internal.InternalDistributedSystem.connectInternal(InternalDistributedSystem.java:216)
at
org.apache.geode.distributed.DistributedSystem.connect(DistributedSystem.java:159)
at
org.apache.geode.test.dunit.internal.JUnit4DistributedTestCase.getSystem(JUnit4DistributedTestCase.java:180)
at
org.apache.geode.distributed.DistributedMemberDUnitTest$6.run(DistributedMemberDUnitTest.java:340)
Caused by:
org.apache.geode.distributed.DistributedSystemDisconnectedException: membership
shutdown, caused by org.apache.geode.ForcedDisconnectException: Exiting due to
possible network partition event due to loss of 1 cache processes:
[172.17.0.16(myName:1)<v18>:41001] at
org.apache.geode.distributed.internal.DistributionImpl.checkCancelled(DistributionImpl.java:311)
at
org.apache.geode.distributed.internal.DistributionImpl.directChannelSend(DistributionImpl.java:358)
at
org.apache.geode.distributed.internal.DistributionImpl.send(DistributionImpl.java:289)
at
org.apache.geode.distributed.internal.ClusterDistributionManager.sendViaMembershipManager(ClusterDistributionManager.java:2058)
at
org.apache.geode.distributed.internal.ClusterDistributionManager.sendOutgoing(ClusterDistributionManager.java:1986)
at
org.apache.geode.distributed.internal.StartupOperation.sendStartupMessage(StartupOperation.java:74)
at
org.apache.geode.distributed.internal.ClusterDistributionManager.sendStartupMessage(ClusterDistributionManager.java:1623)
... 9 more
Caused by:
org.apache.geode.ForcedDisconnectException: Exiting due to possible network
partition event due to loss of 1 cache processes:
[172.17.0.16(myName:1)<v18>:41001] at
org.apache.geode.distributed.internal.DistributionImpl.checkCancelled(DistributionImpl.java:310)
... 15 more
java.lang.AssertionError: Suspicious strings were written to the log during
this run. Fix the strings or use IgnoredException.addIgnoredException to
ignore.
-----------------------------------------------------------------------
Found suspect string in log4j at line 563
[fatal 2020/03/13 04:25:48.675 GMT <unicast receiver,b8a929243fc3-43862>
tid=165] Possible loss of quorum due to the loss of 1 cache processes:
[172.17.0.16(myName:1)<v18>:41001]
-----------------------------------------------------------------------
Found suspect string in log4j at line 565
[fatal 2020/03/13 04:25:48.675 GMT <unicast receiver,b8a929243fc3-43862>
tid=165] Membership service failure: Exiting due to possible network partition
event due to loss of 1 cache processes: [172.17.0.16(myName:1)<v18>:41001]
org.apache.geode.distributed.internal.membership.api.MemberDisconnectedException:
Exiting due to possible network partition event due to loss of 1 cache
processes: [172.17.0.16(myName:1)<v18>:41001] at
org.apache.geode.distributed.internal.membership.gms.GMSMembership$ManagerImpl.forceDisconnect(GMSMembership.java:2007)
at
org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeave.forceDisconnect(GMSJoinLeave.java:1085)
at
org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeave.installView(GMSJoinLeave.java:1473)
at
org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeave.processMessage(GMSJoinLeave.java:1065)
at
org.apache.geode.distributed.internal.membership.gms.messenger.JGroupsMessenger$JGroupsReceiver.receive(JGroupsMessenger.java:1327)
at
org.apache.geode.distributed.internal.membership.gms.messenger.JGroupsMessenger$JGroupsReceiver.receive(JGroupsMessenger.java:1266)
at org.jgroups.JChannel.invokeCallback(JChannel.java:816) at
org.jgroups.JChannel.up(JChannel.java:741) at
org.jgroups.stack.ProtocolStack.up(ProtocolStack.java:1030) at
org.jgroups.protocols.FRAG2.up(FRAG2.java:165) at
org.jgroups.protocols.FlowControl.up(FlowControl.java:390) at
org.jgroups.protocols.UNICAST3.deliverMessage(UNICAST3.java:1077) at
org.jgroups.protocols.UNICAST3.handleDataReceived(UNICAST3.java:792) at
org.jgroups.protocols.UNICAST3.up(UNICAST3.java:433) at
org.apache.geode.distributed.internal.membership.gms.messenger.StatRecorder.up(StatRecorder.java:72)
at
org.apache.geode.distributed.internal.membership.gms.messenger.AddressManager.up(AddressManager.java:70)
at org.jgroups.protocols.TP.passMessageUp(TP.java:1658) at
org.jgroups.protocols.TP$SingleMessageHandler.run(TP.java:1876) at
org.jgroups.util.DirectExecutor.execute(DirectExecutor.java:10) at
org.jgroups.protocols.TP.handleSingleMessage(TP.java:1789) at
org.jgroups.protocols.TP.receive(TP.java:1714) at
org.apache.geode.distributed.internal.membership.gms.messenger.Transport.receive(Transport.java:159)
at org.jgroups.protocols.UDP$PacketReceiver.run(UDP.java:701)
at java.lang.Thread.run(Thread.java:748)
{code}
[https://concourse.apachegeode-ci.info/teams/main/pipelines/apache-mass-test-run-main/jobs/DistributedTestOpenJDK8/builds/1155]
[https://concourse.apachegeode-ci.info/teams/main/pipelines/apache-mass-test-run-main/jobs/DistributedTestOpenJDK8/builds/1106]
[https://concourse.apachegeode-ci.info/teams/main/pipelines/apache-mass-test-run-main/jobs/DistributedTestOpenJDK8/builds/1099]
> CI failure: DistributedMemberDUnitTest.testGroupsInAllVMs Failure
> -----------------------------------------------------------------
>
> Key: GEODE-7460
> URL: https://issues.apache.org/jira/browse/GEODE-7460
> Project: Geode
> Issue Type: Bug
> Components: membership
> Reporter: Robert Houghton
> Assignee: Bill Burcham
> Priority: Major
> Fix For: 1.12.0
>
>
> From the failing job:
> =-=-=-=-=-=-=-=-=-=-=-=-=-=-= Test Results URI
> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
> http://files.apachegeode-ci.info/builds/apache-develop-main/1.12.0-SNAPSHOT.0016/test-results/distributedTest/1573784422/
> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
> Test report artifacts from this job are available at:
> http://files.apachegeode-ci.info/builds/apache-develop-main/1.12.0-SNAPSHOT.0016/test-artifacts/1573784422/distributedtestfiles-OpenJDK8-1.12.0-SNAPSHOT.0016.tgz
> DistributedTest failure due to exception:
> org.apache.geode.distributed.DistributedMemberDUnitTest > testGroupsInAllVMs
> FAILED
> org.apache.geode.test.dunit.RMIException: While invoking
> org.apache.geode.distributed.DistributedMemberDUnitTest$6.run in VM 0 running
> on Host 3e09f1029b44 with 4 VMs
> at org.apache.geode.test.dunit.VM.executeMethodOnObject(VM.java:579)
> at org.apache.geode.test.dunit.VM.invoke(VM.java:406)
> at
> org.apache.geode.distributed.DistributedMemberDUnitTest.testGroupsInAllVMs(DistributedMemberDUnitTest.java:333)
> Caused by:
> org.apache.geode.SystemConnectException: One or more peers generated
> exceptions during connection attempt
> at
> org.apache.geode.distributed.internal.ClusterDistributionManager.sendStartupMessage(ClusterDistributionManager.java:1625)
> at
> org.apache.geode.distributed.internal.ClusterDistributionManager.create(ClusterDistributionManager.java:354)
> at
> org.apache.geode.distributed.internal.InternalDistributedSystem.initialize(InternalDistributedSystem.java:759)
> at
> org.apache.geode.distributed.internal.InternalDistributedSystem.access$200(InternalDistributedSystem.java:136)
> at
> org.apache.geode.distributed.internal.InternalDistributedSystem$Builder.build(InternalDistributedSystem.java:3009)
> at
> org.apache.geode.distributed.internal.InternalDistributedSystem.connectInternal(InternalDistributedSystem.java:269)
> at
> org.apache.geode.distributed.DistributedSystem.connect(DistributedSystem.java:159)
> at
> org.apache.geode.test.dunit.internal.JUnit4DistributedTestCase.getSystem(JUnit4DistributedTestCase.java:181)
> at
> org.apache.geode.distributed.DistributedMemberDUnitTest$6.run(DistributedMemberDUnitTest.java:339)
> Caused by:
>
> org.apache.geode.distributed.DistributedSystemDisconnectedException:
> DistributedSystem is shutting down, caused by
> org.apache.geode.ForcedDisconnectException: Exiting due to possible network
> partition event due to loss of 1 cache processes:
> [172.17.0.14(myName:1)<v18>:41001]
> at
> org.apache.geode.distributed.internal.membership.adapter.GMSMembershipManager.directChannelSend(GMSMembershipManager.java:1591)
> at
> org.apache.geode.distributed.internal.membership.adapter.GMSMembershipManager.send(GMSMembershipManager.java:1751)
> at
> org.apache.geode.distributed.internal.ClusterDistributionManager.sendViaMembershipManager(ClusterDistributionManager.java:2058)
> at
> org.apache.geode.distributed.internal.ClusterDistributionManager.sendOutgoing(ClusterDistributionManager.java:1985)
> at
> org.apache.geode.distributed.internal.StartupOperation.sendStartupMessage(StartupOperation.java:74)
> at
> org.apache.geode.distributed.internal.ClusterDistributionManager.sendStartupMessage(ClusterDistributionManager.java:1622)
> ... 8 more
> Caused by:
> org.apache.geode.ForcedDisconnectException: Exiting due to
> possible network partition event due to loss of 1 cache processes:
> [172.17.0.14(myName:1)<v18>:41001]
> java.lang.AssertionError: Suspicious strings were written to the log
> during this run.
> Fix the strings or use IgnoredException.addIgnoredException to ignore.
> -----------------------------------------------------------------------
> Found suspect string in log4j at line 554
> [fatal 2019/11/15 00:40:19.902 GMT <unicast receiver,3e09f1029b44-61225>
> tid=165] Possible loss of quorum due to the loss of 1 cache processes:
> [172.17.0.14(myName:1)<v18>:41001]
> -----------------------------------------------------------------------
> Found suspect string in log4j at line 556
> [fatal 2019/11/15 00:40:19.903 GMT <unicast receiver,3e09f1029b44-61225>
> tid=165] Membership service failure: Exiting due to possible network
> partition event due to loss of 1 cache processes:
> [172.17.0.14(myName:1)<v18>:41001]
> org.apache.geode.ForcedDisconnectException: Exiting due to possible
> network partition event due to loss of 1 cache processes:
> [172.17.0.14(myName:1)<v18>:41001]
> at
> org.apache.geode.distributed.internal.membership.adapter.GMSMembershipManager$ManagerImpl.forceDisconnect(GMSMembershipManager.java:2586)
> at
> org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeave.forceDisconnect(GMSJoinLeave.java:1073)
> at
> org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeave.installView(GMSJoinLeave.java:1500)
> at
> org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeave.processMessage(GMSJoinLeave.java:1053)
> at
> org.apache.geode.distributed.internal.membership.gms.messenger.JGroupsMessenger$JGroupsReceiver.receive(JGroupsMessenger.java:1330)
> at
> org.apache.geode.distributed.internal.membership.gms.messenger.JGroupsMessenger$JGroupsReceiver.receive(JGroupsMessenger.java:1269)
> at org.jgroups.JChannel.invokeCallback(JChannel.java:816)
> at org.jgroups.JChannel.up(JChannel.java:741)
> at org.jgroups.stack.ProtocolStack.up(ProtocolStack.java:1030)
> at org.jgroups.protocols.FRAG2.up(FRAG2.java:165)
> at org.jgroups.protocols.FlowControl.up(FlowControl.java:390)
> at org.jgroups.protocols.UNICAST3.deliverMessage(UNICAST3.java:1077)
> at org.jgroups.protocols.UNICAST3.handleDataReceived(UNICAST3.java:792)
> at org.jgroups.protocols.UNICAST3.up(UNICAST3.java:433)
> at
> org.apache.geode.distributed.internal.membership.gms.messenger.StatRecorder.up(StatRecorder.java:73)
> at
> org.apache.geode.distributed.internal.membership.gms.messenger.AddressManager.up(AddressManager.java:72)
> at org.jgroups.protocols.TP.passMessageUp(TP.java:1658)
> at org.jgroups.protocols.TP$SingleMessageHandler.run(TP.java:1876)
> at org.jgroups.util.DirectExecutor.execute(DirectExecutor.java:10)
> at org.jgroups.protocols.TP.handleSingleMessage(TP.java:1789)
> at org.jgroups.protocols.TP.receive(TP.java:1714)
> at
> org.apache.geode.distributed.internal.membership.gms.messenger.Transport.receive(Transport.java:152)
> at org.jgroups.protocols.UDP$PacketReceiver.run(UDP.java:701)
> at java.lang.Thread.run(Thread.java:748)
--
This message was sent by Atlassian Jira
(v8.3.4#803005)