jvarenina commented on a change in pull request #6646:
URL: https://github.com/apache/geode/pull/6646#discussion_r661204574



##########
File path: 
geode-wan/src/distributedTest/java/org/apache/geode/internal/cache/wan/parallel/ParallelGatewaySenderAndCQDurableClientDUnitTest.java
##########
@@ -0,0 +1,530 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more 
contributor license
+ * agreements. See the NOTICE file distributed with this work for additional 
information regarding
+ * copyright ownership. The ASF licenses this file to You under the Apache 
License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the 
License. You may obtain a
+ * copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software 
distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
KIND, either express
+ * or implied. See the License for the specific language governing permissions 
and limitations under
+ * the License.
+ */
+package org.apache.geode.internal.cache.wan.parallel;
+
+import static 
org.apache.geode.distributed.ConfigurationProperties.DISTRIBUTED_SYSTEM_ID;
+import static 
org.apache.geode.distributed.ConfigurationProperties.DURABLE_CLIENT_ID;
+import static 
org.apache.geode.distributed.ConfigurationProperties.REMOTE_LOCATORS;
+import static 
org.apache.geode.internal.cache.wan.wancommand.WANCommandUtils.getMember;
+import static 
org.apache.geode.internal.cache.wan.wancommand.WANCommandUtils.validateGatewaySenderMXBeanProxy;
+import static 
org.apache.geode.internal.cache.wan.wancommand.WANCommandUtils.verifySenderState;
+import static org.apache.geode.test.awaitility.GeodeAwaitility.await;
+import static org.assertj.core.api.Assertions.assertThat;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Objects;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.BlockingQueue;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import org.apache.geode.cache.CacheTransactionManager;
+import org.apache.geode.cache.Region;
+import org.apache.geode.cache.query.CqAttributesFactory;
+import org.apache.geode.cache.query.CqEvent;
+import org.apache.geode.cache.query.CqListener;
+import org.apache.geode.cache.query.CqQuery;
+import org.apache.geode.cache.query.QueryService;
+import org.apache.geode.cache.wan.GatewaySender;
+import org.apache.geode.internal.cache.InitialImageOperation;
+import org.apache.geode.internal.cache.InternalCache;
+import org.apache.geode.internal.cache.PartitionedRegion;
+import org.apache.geode.internal.cache.RegionQueue;
+import org.apache.geode.internal.cache.wan.AbstractGatewaySender;
+import org.apache.geode.internal.cache.wan.GatewaySenderEventImpl;
+import org.apache.geode.management.internal.cli.util.CommandStringBuilder;
+import org.apache.geode.management.internal.i18n.CliStrings;
+import org.apache.geode.test.dunit.rules.ClientVM;
+import org.apache.geode.test.dunit.rules.ClusterStartupRule;
+import org.apache.geode.test.dunit.rules.MemberVM;
+import org.apache.geode.test.junit.categories.WanTest;
+import org.apache.geode.test.junit.rules.GfshCommandRule;
+
+@Category({WanTest.class})
+public class ParallelGatewaySenderAndCQDurableClientDUnitTest implements 
Serializable {
+
+  @Rule
+  public ClusterStartupRule clusterStartupRule = new ClusterStartupRule(9);
+
+  @Rule
+  public transient GfshCommandRule gfsh = new GfshCommandRule();
+
+  private static CqListenerTestReceivedEvents cqListener;
+
+  private MemberVM locatorSite2;
+  private MemberVM server1Site2;
+  private MemberVM server2Site2;
+  private MemberVM server3Site2;
+
+  private ClientVM clientSite2;
+  private ClientVM clientSite2DurableSubscription;
+
+  public static boolean IS_TEMP_QUEUE_USED = false;
+  public static boolean IS_HOOK_TRIGGERED = false;
+
+  private static final String DISTRIBUTED_SYSTEM_ID_SITE1 = "1";
+  private static final String DISTRIBUTED_SYSTEM_ID_SITE2 = "2";
+  private static final String REGION_NAME = "test1";
+  private static final String REGION_COLOCATED = "colocatedRegion";
+  private static final String REGION_CQ = "SELECT * FROM " + Region.SEPARATOR 
+ REGION_NAME;
+  private static final String COLO_REGION_CQ =
+      "SELECT * FROM " + Region.SEPARATOR + REGION_COLOCATED;
+
+  /**
+   * Issue reproduces when following conditions are fulfilled:
+   * - Redundant partition region must configured
+   * - Number of servers must be greater than number of redundant copies of 
partition region
+   * - Parallel gateway sender must be configured on partition region
+   * - Client must register CQs for the region
+   * - Transactions must be used with put operations
+   * - Events must enqueued in parallel gateway senders (remote site is 
unavailable)
+   *
+   * Server that is hosting primary bucket will send TXCommitMessage to the 
server that
+   * is hosting secondary bucket, and also to the server that is hosting CQ 
subscription
+   * queue (if CQ condition is fulfilled). The problem is that server which is 
hosting CQ
+   * subscription queue is not hosting the bucket for which event it actually 
intended.
+   * In this case the server will store this event in bucketToTempQueueMap 
because it assumes
+   * that the bucket is in the process of the creation, which is not correct.
+   */
+  @Test
+  public void testSubscriptionQueueWan() throws Exception {
+    configureSites("113", false);
+    startDurableClient();
+    createDurableCQs(REGION_CQ);
+
+    verifyGatewaySenderState(true, false);
+
+    // Do some puts and check that data has been enqueued
+    Set<String> keysQueue =
+        clientSite2.invoke(() -> doPutsInRangeTransactionSingleKey(0, 30, 
REGION_NAME));
+    server1Site2.invoke(() -> checkQueueSize("ln", keysQueue.size()));
+
+    server1Site2.invoke(() -> validateBucketToTempQueueMap("ln", true));
+    server2Site2.invoke(() -> validateBucketToTempQueueMap("ln", true));
+    server3Site2.invoke(() -> validateBucketToTempQueueMap("ln", true));
+
+    // Check that durable client has received all events
+    checkCqEvents(keysQueue.size());
+  }
+
+  @Test
+  public void testSubscriptionQueueWanColocatedRegionsMultipleOperations() 
throws Exception {
+    configureSites("113", true);
+    startDurableClient();
+    createDurableCQs(REGION_CQ, COLO_REGION_CQ);
+
+    verifyGatewaySenderState(true, false);
+
+    // Do some puts and check that data has been enqueued
+    Set<String> keysQueue =
+        clientSite2.invoke(() -> 
doPutsInRangeTransactionWithTwoPutOperation(0, 30));
+    server1Site2.invoke(() -> checkQueueSize("ln", keysQueue.size() * 2));
+
+    server1Site2.invoke(() -> validateBucketToTempQueueMap("ln", true));
+    server2Site2.invoke(() -> validateBucketToTempQueueMap("ln", true));
+    server3Site2.invoke(() -> validateBucketToTempQueueMap("ln", true));
+
+    // Check that durable client has received all events
+    checkCqEvents(keysQueue.size() * 2);
+  }
+
+  /**
+   * This test case verifies that the server during bucket recovery enqueues 
all events intended for
+   * that bucket in temporary queue, and that after bucket redundancy is 
restored events are
+   * transferred from temporary queue to real bucket queue.
+   */
+  @Test
+  public void testSubscriptionQueueWanTrafficWhileRebalanced() throws 
Exception {
+    configureSites("3", false);
+    verifyGatewaySenderState(true, false);
+
+    List<MemberVM> allMembers = new ArrayList<>();
+    allMembers.add(server1Site2);
+    allMembers.add(server2Site2);
+    allMembers.add(server3Site2);
+
+    // Do some puts so that all bucket are created
+    Set<String> keysQueue = clientSite2.invoke(() -> doPutsInRange(0, 100));
+    server1Site2.invoke(() -> checkQueueSize("ln", keysQueue.size()));
+
+    // check that bucketToTempQueueMap is empty on all members
+    for (MemberVM member : allMembers) {
+      member.invoke(() -> validateBucketToTempQueueMap("ln", true));
+    }
+
+    // Choose server with bucket to be stopped
+    MemberVM serverToStop = getServerToStop(allMembers);
+    int bucketId = 
serverToStop.invoke(this::getPrimaryBucketList).iterator().next();
+    serverToStop.stop(true);
+
+    // remove from list the member that has been previously stopped
+    allMembers.remove(serverToStop);
+
+    startDurableClient();
+    createDurableCQs("SELECT * FROM " + Region.SEPARATOR + REGION_NAME);
+
+    // configure hook on running members
+    for (MemberVM member : allMembers) {
+      configureHooksOnRunningMember(member, bucketId);
+    }
+
+    // perform rebalance operation to trigger bucket redundancy recovery on 
running servers
+    String command = new CommandStringBuilder(CliStrings.REBALANCE)
+        .getCommandString();
+    gfsh.executeAndAssertThat(command).statusIsSuccess();
+
+    for (MemberVM member : allMembers) {
+      // All members after redundancy is recovered should have empty temporary 
queue
+      member.invoke(() -> validateBucketToTempQueueMap("ln", true));
+      // If hook has been triggered on member, then check if member 
temporarily queued
+      // events while getting initial image from primary server
+      if 
(member.invoke(ParallelGatewaySenderAndCQDurableClientDUnitTest::isHookTriggered))
 {

Review comment:
       Hi @DonalEvans ,
   
   Thanks for the review!
   
   The hook **will be always triggered** on at least on one of the servers that 
will restore bucket redundancy. Hook is installed on all running servers and 
then rebalance command is triggered. This code loops through all servers to 
find the servers that actually restored redundancy (the ones that triggered 
hook isHookTriggered == true), and then verify that events has been stored to 
temporary gateway sender queue during recovery of bucket redundancy. So I don't 
think that this test case could be flaky, since hook will be always triggered 
on at least one server.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to