This is an automated email from the ASF dual-hosted git repository.

bschuchardt pushed a commit to branch feature/GEODE-5385
in repository https://gitbox.apache.org/repos/asf/geode.git


The following commit(s) were added to refs/heads/feature/GEODE-5385 by this 
push:
     new 86b7bd9  GEODE-5385: hang trying to create a bucket
86b7bd9 is described below

commit 86b7bd950a21065cab15973261231298b52d1f6f
Author: Bruce Schuchardt <[email protected]>
AuthorDate: Thu Jul 5 15:10:32 2018 -0700

    GEODE-5385: hang trying to create a bucket
    
    We now look for a ForceReattemptException when destroying a partitioned
    region.  This prevents a region ID skew that can occur if another node
    is still initializing its region and is not yet ready to destroy it.
    
    I've reenabled the PRSanityCheckMessage that watches for skews like this
    and reports them.  This used to be enabled by default but somehow was
    disabled a long time ago.
---
 .../cache/DestroyPartitionedRegionMessage.java     | 30 ++++++++--
 .../geode/internal/cache/PartitionedRegion.java    | 27 ++++++---
 .../cache/partitioned/PRSanityCheckMessage.java    |  2 +-
 .../cache/partitioned/PartitionMessage.java        | 28 +++++++---
 .../geode/internal/i18n/LocalizedStrings.java      |  6 +-
 .../cache/PartitionedRegionDestroyJUnitTest.java   | 65 ++++++++++++++++++++++
 6 files changed, 134 insertions(+), 24 deletions(-)

diff --git 
a/geode-core/src/main/java/org/apache/geode/internal/cache/DestroyPartitionedRegionMessage.java
 
b/geode-core/src/main/java/org/apache/geode/internal/cache/DestroyPartitionedRegionMessage.java
index 6f5ec79..26808fa 100755
--- 
a/geode-core/src/main/java/org/apache/geode/internal/cache/DestroyPartitionedRegionMessage.java
+++ 
b/geode-core/src/main/java/org/apache/geode/internal/cache/DestroyPartitionedRegionMessage.java
@@ -27,6 +27,7 @@ import org.apache.geode.cache.CacheException;
 import org.apache.geode.cache.Operation;
 import org.apache.geode.distributed.internal.ClusterDistributionManager;
 import org.apache.geode.distributed.internal.DistributionAdvisor;
+import org.apache.geode.distributed.internal.DistributionManager;
 import org.apache.geode.distributed.internal.InternalDistributedSystem;
 import org.apache.geode.distributed.internal.ReplyException;
 import org.apache.geode.distributed.internal.ReplyMessage;
@@ -35,6 +36,7 @@ import org.apache.geode.internal.Assert;
 import org.apache.geode.internal.cache.partitioned.PartitionMessage;
 import org.apache.geode.internal.cache.partitioned.RegionAdvisor;
 import 
org.apache.geode.internal.cache.partitioned.RegionAdvisor.PartitionProfile;
+import org.apache.geode.internal.i18n.LocalizedStrings;
 import org.apache.geode.internal.logging.LogService;
 import org.apache.geode.internal.logging.log4j.LogMarker;
 
@@ -112,8 +114,25 @@ public class DestroyPartitionedRegionMessage extends 
PartitionMessage {
   }
 
   @Override
+  protected Throwable processCheckForPR(PartitionedRegion pr,
+      DistributionManager distributionManager) {
+    if (pr != null && !pr.getDistributionAdvisor().isInitialized()) {
+      Throwable thr = new ForceReattemptException(
+          
LocalizedStrings.PartitionMessage_0_COULD_NOT_FIND_PARTITIONED_REGION_WITH_ID_1
+              
.toLocalizedString(distributionManager.getDistributionManagerId(),
+                  pr.getRegionIdentifier()));
+      return thr; // reply sent in finally block below
+    }
+    return null;
+  }
+
+
+  @Override
   protected boolean operateOnPartitionedRegion(ClusterDistributionManager dm, 
PartitionedRegion r,
       long startTime) throws CacheException {
+    if (r == null) {
+      return true;
+    }
     if (this.op.isLocal()) {
       // notify the advisor that the sending member has locally destroyed (or 
closed) the region
 
@@ -219,13 +238,14 @@ public class DestroyPartitionedRegionMessage extends 
PartitionMessage {
       super(system, initMembers);
     }
 
-    /**
-     * Ignore any incoming exception from other VMs, we just want an 
acknowledgement that the
-     * message was processed.
-     */
     @Override
     protected void processException(ReplyException ex) {
-      if (logger.isDebugEnabled()) {
+      // retry on ForceReattempt in case the region is still being initialized
+      if (ex.getRootCause() instanceof ForceReattemptException) {
+        super.processException(ex);
+      }
+      // other errors are ignored
+      else if (logger.isDebugEnabled()) {
         logger.debug("DestroyRegionResponse ignoring exception", ex);
       }
     }
diff --git 
a/geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegion.java
 
b/geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegion.java
index 99aa3f0..da3fcad 100755
--- 
a/geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegion.java
+++ 
b/geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegion.java
@@ -919,7 +919,7 @@ public class PartitionedRegion extends LocalRegion
   }
 
   private void createAndValidatePersistentConfig() {
-    DiskStoreImpl dsi = this.getDiskStore();
+    DiskStoreImpl diskStore = this.getDiskStore();
     if (this.getDataPolicy().withPersistence() && 
!this.getConcurrencyChecksEnabled()
         && supportsConcurrencyChecks()) {
       logger.info(LocalizedMessage.create(
@@ -927,9 +927,9 @@ public class PartitionedRegion extends LocalRegion
           this.getFullPath()));
       this.setConcurrencyChecksEnabled(true);
     }
-    if (dsi != null && this.getDataPolicy().withPersistence()) {
+    if (diskStore != null && this.getDataPolicy().withPersistence()) {
       String colocatedWith = colocatedWithRegion == null ? "" : 
colocatedWithRegion.getFullPath();
-      PRPersistentConfig config = 
dsi.getPersistentPRConfig(this.getFullPath());
+      PRPersistentConfig config = 
diskStore.getPersistentPRConfig(this.getFullPath());
       if (config != null) {
         if (config.getTotalNumBuckets() != this.getTotalNumberOfBuckets()) {
           Object[] prms = new Object[] {this.getFullPath(), 
this.getTotalNumberOfBuckets(),
@@ -947,8 +947,8 @@ public class PartitionedRegion extends LocalRegion
           DiskAccessException dae = new DiskAccessException(
               
LocalizedStrings.LocalRegion_A_DISKACCESSEXCEPTION_HAS_OCCURRED_WHILE_WRITING_TO_THE_DISK_FOR_REGION_0_THE_REGION_WILL_BE_CLOSED
                   .toLocalizedString(this.getFullPath()),
-              null, dsi);
-          dsi.handleDiskAccessException(dae);
+              null, diskStore);
+          diskStore.handleDiskAccessException(dae);
           throw new IllegalStateException(
               
LocalizedStrings.PartitionedRegion_FOR_REGION_0_ColocatedWith_1_SHOULD_NOT_BE_CHANGED_Previous_Configured_2
                   .toString(prms));
@@ -956,13 +956,13 @@ public class PartitionedRegion extends LocalRegion
       } else {
 
         config = new PRPersistentConfig(this.getTotalNumberOfBuckets(), 
colocatedWith);
-        dsi.addPersistentPR(this.getFullPath(), config);
+        diskStore.addPersistentPR(this.getFullPath(), config);
         // Fix for support issue 7870 - the parent region needs to be able
         // to discover that there is a persistent colocated child region. So
         // if this is a child region, persist its config to the parent disk 
store
         // as well.
         if (colocatedWithRegion != null && colocatedWithRegion.getDiskStore() 
!= null
-            && colocatedWithRegion.getDiskStore() != dsi) {
+            && colocatedWithRegion.getDiskStore() != diskStore) {
           
colocatedWithRegion.getDiskStore().addPersistentPR(this.getFullPath(), config);
         }
       }
@@ -7448,6 +7448,13 @@ public class PartitionedRegion extends LocalRegion
    * @see GemFireCacheImpl#close()
    */
   private void sendDestroyRegionMessage(RegionEventImpl event, int serials[]) {
+    boolean retry = true;
+    while (retry) {
+      retry = attemptToSendDestroyRegionMessage(event, serials);
+    }
+  }
+
+  private boolean attemptToSendDestroyRegionMessage(RegionEventImpl event, int 
serials[]) {
     if (this.prRoot == null) {
       if (logger.isDebugEnabled()) {
         logger.debug(
@@ -7455,7 +7462,7 @@ public class PartitionedRegion extends LocalRegion
             this);
       }
       new UpdateAttributesProcessor(this, true).distribute(false);
-      return;
+      return false;
     }
     final HashSet configRecipients = new 
HashSet(getRegionAdvisor().adviseAllPRNodes());
 
@@ -7484,11 +7491,15 @@ public class PartitionedRegion extends LocalRegion
           DestroyPartitionedRegionMessage.send(configRecipients, this, event, 
serials);
       resp.waitForRepliesUninterruptibly();
     } catch (ReplyException e) {
+      if (e.getRootCause() instanceof ForceReattemptException) {
+        return true;
+      }
       logger.warn(
           LocalizedMessage.create(
               
LocalizedStrings.PartitionedRegion_PARTITIONEDREGION_SENDDESTROYREGIONMESSAGE_CAUGHT_EXCEPTION_DURING_DESTROYREGIONMESSAGE_SEND_AND_WAITING_FOR_RESPONSE),
           e);
     }
+    return false;
   }
 
   /**
diff --git 
a/geode-core/src/main/java/org/apache/geode/internal/cache/partitioned/PRSanityCheckMessage.java
 
b/geode-core/src/main/java/org/apache/geode/internal/cache/partitioned/PRSanityCheckMessage.java
index 7b3038a..5906f08 100755
--- 
a/geode-core/src/main/java/org/apache/geode/internal/cache/partitioned/PRSanityCheckMessage.java
+++ 
b/geode-core/src/main/java/org/apache/geode/internal/cache/partitioned/PRSanityCheckMessage.java
@@ -95,7 +95,7 @@ public class PRSanityCheckMessage extends PartitionMessage {
    * gemfire.PRSanityCheckEnabled=true.
    */
   public static void schedule(final PartitionedRegion pr) {
-    if (Boolean.getBoolean(DistributionConfig.GEMFIRE_PREFIX + 
"PRSanityCheckEnabled")) {
+    if (!Boolean.getBoolean(DistributionConfig.GEMFIRE_PREFIX + 
"PRSanityCheckDisabled")) {
       final DistributionManager dm = pr.getDistributionManager();
       // RegionAdvisor ra = pr.getRegionAdvisor();
       // final Set recipients = ra.adviseAllPRNodes();
diff --git 
a/geode-core/src/main/java/org/apache/geode/internal/cache/partitioned/PartitionMessage.java
 
b/geode-core/src/main/java/org/apache/geode/internal/cache/partitioned/PartitionMessage.java
index dbc8eff..5b1f459 100644
--- 
a/geode-core/src/main/java/org/apache/geode/internal/cache/partitioned/PartitionMessage.java
+++ 
b/geode-core/src/main/java/org/apache/geode/internal/cache/partitioned/PartitionMessage.java
@@ -310,13 +310,10 @@ public abstract class PartitionMessage extends 
DistributionMessage
         return;
       }
       pr = getPartitionedRegion();
-      if ((pr == null || !pr.getDistributionAdvisor().isInitialized()) && 
failIfRegionMissing()) {
-        // if the distributed system is disconnecting, don't send a reply 
saying
-        // the partitioned region can't be found (bug 36585)
-        thr = new ForceReattemptException(
-            
LocalizedStrings.PartitionMessage_0_COULD_NOT_FIND_PARTITIONED_REGION_WITH_ID_1
-                .toLocalizedString(dm.getDistributionManagerId(), regionId));
-        return; // reply sent in finally block below
+      Throwable forcedReattempt = processCheckForPR(pr, dm);
+      if (forcedReattempt != null) {
+        thr = forcedReattempt;
+        return;
       }
 
       if (pr != null) {
@@ -420,6 +417,23 @@ public abstract class PartitionMessage extends 
DistributionMessage
   }
 
   /**
+   * If the PR is missing or isn't ready for use we may want to return a
+   * ForceReattemptException to have the sender retry after a bit
+   */
+  protected Throwable processCheckForPR(PartitionedRegion pr,
+      DistributionManager distributionManager) {
+    if ((pr == null || !pr.getDistributionAdvisor().isInitialized()) && 
failIfRegionMissing()) {
+      // if the distributed system is disconnecting, don't send a reply saying
+      // the partitioned region can't be found (bug 36585)
+      Throwable thr = new ForceReattemptException(
+          
LocalizedStrings.PartitionMessage_0_COULD_NOT_FIND_PARTITIONED_REGION_WITH_ID_1
+              
.toLocalizedString(distributionManager.getDistributionManagerId(), regionId));
+      return thr; // reply sent in finally block below
+    }
+    return null;
+  }
+
+  /**
    * Send a generic ReplyMessage. This is in a method so that subclasses can 
override the reply
    * message type
    *
diff --git 
a/geode-core/src/main/java/org/apache/geode/internal/i18n/LocalizedStrings.java 
b/geode-core/src/main/java/org/apache/geode/internal/i18n/LocalizedStrings.java
index 9cb45c4..4121c02 100755
--- 
a/geode-core/src/main/java/org/apache/geode/internal/i18n/LocalizedStrings.java
+++ 
b/geode-core/src/main/java/org/apache/geode/internal/i18n/LocalizedStrings.java
@@ -1330,7 +1330,7 @@ public class LocalizedStrings {
   public static final StringId 
AbstractDistributionConfig_CLIENT_CONFLATION_PROP_NAME =
       new StringId(1839, "Client override for server queue conflation 
setting");
   public static final StringId 
PRHARRedundancyProvider_ALLOCATE_ENOUGH_MEMBERS_TO_HOST_BUCKET =
-      new StringId(1840, "allocate enough members to host bucket.");
+      new StringId(1840, "allocate enough members to host a new bucket");
   public static final StringId 
PRHARedundancyProvider_TIME_OUT_WAITING_0_MS_FOR_CREATION_OF_BUCKET_FOR_PARTITIONED_REGION_1_MEMBERS_REQUESTED_TO_CREATE_THE_BUCKET_ARE_2
 =
       new StringId(1841,
           "Time out waiting {0} ms for creation of bucket for partitioned 
region {1}. Members requested to create the bucket are: {2}");
@@ -1358,7 +1358,7 @@ public class LocalizedStrings {
       new StringId(1852, "Excpetion  in bucket index creation : {0}");
   public static final StringId 
PRHARRedundancyProvider_CONFIGURED_REDUNDANCY_LEVEL_COULD_NOT_BE_SATISFIED_0 =
       new StringId(1853,
-          "Configured Redundancy Level Could Not be Satisfied. {0} to satisfy 
redundancy for the region.{1}");
+          "Configured redundancy level could not be satisfied. {0} to satisfy 
redundancy for the region.{1}");
   public static final StringId 
PartitionedRegionDataStore_PARTITIONEDREGION_0_CAUGHT_UNEXPECTED_EXCEPTION_DURING_CLEANUP
 =
       new StringId(1854, "PartitionedRegion {0}: caught unexpected exception 
during data cleanup");
   public static final StringId 
MemberFunctionExecutor_NO_MEMBER_FOUND_FOR_EXECUTING_FUNCTION_0 =
@@ -3355,7 +3355,7 @@ public class LocalizedStrings {
   public static final StringId 
MemberMessage_MEMBERRESPONSE_GOT_MEMBERDEPARTED_EVENT_FOR_0_CRASHED_1 =
       new StringId(3033, "MemberResponse got memberDeparted event for < {0} > 
crashed =  {1}");
   public static final StringId 
PartitionMessage_0_COULD_NOT_FIND_PARTITIONED_REGION_WITH_ID_1 =
-      new StringId(3034, "{0} : could not find partitioned region with Id  
{1}");
+      new StringId(3034, "{0} : could not find partitioned region with Id 
{1}");
   public static final StringId PartitionMessage_ATTEMPT_FAILED =
       new StringId(3035, "Attempt failed");
   public static final StringId 
PartitionMessage_DISTRIBUTED_SYSTEM_IS_DISCONNECTING =
diff --git 
a/geode-core/src/test/java/org/apache/geode/internal/cache/PartitionedRegionDestroyJUnitTest.java
 
b/geode-core/src/test/java/org/apache/geode/internal/cache/PartitionedRegionDestroyJUnitTest.java
new file mode 100644
index 0000000..690fbf0
--- /dev/null
+++ 
b/geode-core/src/test/java/org/apache/geode/internal/cache/PartitionedRegionDestroyJUnitTest.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more 
contributor license
+ * agreements. See the NOTICE file distributed with this work for additional 
information regarding
+ * copyright ownership. The ASF licenses this file to You under the Apache 
License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the 
License. You may obtain a
+ * copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software 
distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 
KIND, either express
+ * or implied. See the License for the specific language governing permissions 
and limitations under
+ * the License.
+ */
+package org.apache.geode.internal.cache;
+
+import static org.apache.geode.internal.Assert.assertTrue;
+import static org.junit.Assert.assertNull;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import org.apache.geode.distributed.internal.DistributionAdvisor;
+import org.apache.geode.distributed.internal.DistributionManager;
+import 
org.apache.geode.distributed.internal.membership.InternalDistributedMember;
+import org.apache.geode.test.junit.categories.UnitTest;
+
+@Category(UnitTest.class)
+public class PartitionedRegionDestroyJUnitTest {
+
+  private PartitionedRegion partitionedRegion;
+  private DistributionAdvisor advisor;
+  private DistributionManager manager;
+
+  @Before
+  public void setup() {
+    manager = mock(DistributionManager.class);
+    when(manager.getDistributionManagerId())
+        .thenReturn(new InternalDistributedMember("localhost", 1));
+
+    partitionedRegion = mock(PartitionedRegion.class);
+    advisor = mock(DistributionAdvisor.class);
+    when(partitionedRegion.getDistributionAdvisor()).thenReturn(advisor);
+    when(partitionedRegion.getRegionIdentifier()).thenReturn("testRegion");
+  }
+
+  @Test
+  public void destroyMessageRequiresReattemptIfRegionInitializing() {
+    when(advisor.isInitialized()).thenReturn(Boolean.FALSE);
+    DestroyPartitionedRegionMessage message = new 
DestroyPartitionedRegionMessage();
+    Throwable exception = message.processCheckForPR(partitionedRegion, 
manager);
+    assertTrue(exception instanceof ForceReattemptException);
+  }
+
+  @Test
+  public void destroyMessageRequiresNoReattemptIfRegionInitialized() {
+    when(advisor.isInitialized()).thenReturn(Boolean.TRUE);
+    DestroyPartitionedRegionMessage message = new 
DestroyPartitionedRegionMessage();
+    Throwable exception = message.processCheckForPR(partitionedRegion, 
manager);
+    assertNull(exception);
+  }
+}

Reply via email to