This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 300608cfcec47001199cf7b7aefc3ba3ea3a6ed9
Author: chaijunjie0101 <64140218+chaijunjie0...@users.noreply.github.com>
AuthorDate: Sun Apr 21 19:39:32 2024 +0800

    HBASE-28150 CreateTableProcedure and DeleteTableProcedure should sleep a 
while before retrying (#5502)
    
    Signed-off-by: Duo Zhang <zhang...@apache.org>
    (cherry picked from commit 5a404c49504edfef250b248228093823942c5e1b)
---
 .../master/procedure/CreateTableProcedure.java     | 24 +++++-
 .../master/procedure/DeleteTableProcedure.java     | 24 +++++-
 .../BadMasterObserverForCreateDeleteTable.java     | 55 ++++++++++++++
 .../TestCreateDeleteTableProcedureWithRetry.java   | 88 ++++++++++++++++++++++
 .../master/procedure/TestCreateTableProcedure.java |  4 +-
 5 files changed, 190 insertions(+), 5 deletions(-)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java
index 17998fec7bd..23ad3b42aef 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java
@@ -35,12 +35,15 @@ import org.apache.hadoop.hbase.fs.ErasureCodingUtils;
 import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
 import org.apache.hadoop.hbase.master.MasterFileSystem;
 import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
+import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
+import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
 import 
org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
 import 
org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerValidationUtils;
 import org.apache.hadoop.hbase.rsgroup.RSGroupInfo;
 import org.apache.hadoop.hbase.util.CommonFSUtils;
 import org.apache.hadoop.hbase.util.FSTableDescriptors;
 import org.apache.hadoop.hbase.util.ModifyRegionUtils;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -51,6 +54,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
 import 
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.CreateTableState;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
 
 @InterfaceAudience.Private
 public class CreateTableProcedure extends 
AbstractStateMachineTableProcedure<CreateTableState> {
@@ -60,6 +64,7 @@ public class CreateTableProcedure extends 
AbstractStateMachineTableProcedure<Cre
 
   private TableDescriptor tableDescriptor;
   private List<RegionInfo> newRegions;
+  private RetryCounter retryCounter;
 
   public CreateTableProcedure() {
     // Required by the Procedure framework to create the procedure on replay
@@ -80,7 +85,7 @@ public class CreateTableProcedure extends 
AbstractStateMachineTableProcedure<Cre
 
   @Override
   protected Flow executeFromState(final MasterProcedureEnv env, final 
CreateTableState state)
-    throws InterruptedException {
+    throws InterruptedException, ProcedureSuspendedException {
     LOG.info("{} execute state={}", this, state);
     try {
       switch (state) {
@@ -131,6 +136,7 @@ public class CreateTableProcedure extends 
AbstractStateMachineTableProcedure<Cre
           break;
         case CREATE_TABLE_POST_OPERATION:
           postCreate(env);
+          retryCounter = null;
           return Flow.NO_MORE_STATE;
         default:
           throw new UnsupportedOperationException("unhandled state=" + state);
@@ -139,12 +145,26 @@ public class CreateTableProcedure extends 
AbstractStateMachineTableProcedure<Cre
       if (isRollbackSupported(state)) {
         setFailure("master-create-table", e);
       } else {
-        LOG.warn("Retriable error trying to create table=" + getTableName() + 
" state=" + state, e);
+        if (retryCounter == null) {
+          retryCounter = 
ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
+        }
+        long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
+        LOG.warn("Retriable error trying to create table={},state={},suspend 
{}secs.",
+          getTableName(), state, backoff / 1000, e);
+        throw suspend(Math.toIntExact(backoff), true);
       }
     }
+    retryCounter = null;
     return Flow.HAS_MORE_STATE;
   }
 
+  @Override
+  protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
+    setState(ProcedureProtos.ProcedureState.RUNNABLE);
+    env.getProcedureScheduler().addFront(this);
+    return false;
+  }
+
   @Override
   protected void rollbackState(final MasterProcedureEnv env, final 
CreateTableState state)
     throws IOException {
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
index 80fb5d0534d..8c2f1067c95 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
@@ -41,9 +41,12 @@ import org.apache.hadoop.hbase.master.MasterFileSystem;
 import org.apache.hadoop.hbase.mob.MobConstants;
 import org.apache.hadoop.hbase.mob.MobUtils;
 import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
+import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
+import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
 import org.apache.hadoop.hbase.util.CommonFSUtils;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -54,6 +57,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
 import 
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.DeleteTableState;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
 
 @InterfaceAudience.Private
 public class DeleteTableProcedure extends 
AbstractStateMachineTableProcedure<DeleteTableState> {
@@ -61,6 +65,7 @@ public class DeleteTableProcedure extends 
AbstractStateMachineTableProcedure<Del
 
   private List<RegionInfo> regions;
   private TableName tableName;
+  private RetryCounter retryCounter;
 
   public DeleteTableProcedure() {
     // Required by the Procedure framework to create the procedure on replay
@@ -79,7 +84,7 @@ public class DeleteTableProcedure extends 
AbstractStateMachineTableProcedure<Del
 
   @Override
   protected Flow executeFromState(final MasterProcedureEnv env, 
DeleteTableState state)
-    throws InterruptedException {
+    throws InterruptedException, ProcedureSuspendedException {
     if (LOG.isTraceEnabled()) {
       LOG.trace(this + " execute state=" + state);
     }
@@ -124,6 +129,7 @@ public class DeleteTableProcedure extends 
AbstractStateMachineTableProcedure<Del
           break;
         case DELETE_TABLE_POST_OPERATION:
           postDelete(env);
+          retryCounter = null;
           LOG.debug("Finished {}", this);
           return Flow.NO_MORE_STATE;
         default:
@@ -133,12 +139,26 @@ public class DeleteTableProcedure extends 
AbstractStateMachineTableProcedure<Del
       if (isRollbackSupported(state)) {
         setFailure("master-delete-table", e);
       } else {
-        LOG.warn("Retriable error trying to delete table=" + getTableName() + 
" state=" + state, e);
+        if (retryCounter == null) {
+          retryCounter = 
ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
+        }
+        long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
+        LOG.warn("Retriable error trying to delete table={},state={},suspend 
{}secs.",
+          getTableName(), state, backoff / 1000, e);
+        throw suspend(Math.toIntExact(backoff), true);
       }
     }
+    retryCounter = null;
     return Flow.HAS_MORE_STATE;
   }
 
+  @Override
+  protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
+    setState(ProcedureProtos.ProcedureState.RUNNABLE);
+    env.getProcedureScheduler().addFront(this);
+    return false;
+  }
+
   @Override
   protected boolean abort(MasterProcedureEnv env) {
     // TODO: Current behavior is: with no rollback and no abort support, 
procedure may get stuck
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/BadMasterObserverForCreateDeleteTable.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/BadMasterObserverForCreateDeleteTable.java
new file mode 100644
index 00000000000..454a24e198a
--- /dev/null
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/BadMasterObserverForCreateDeleteTable.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.coprocessor;
+
+import java.io.IOException;
+import java.util.Optional;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+
+/**
+ * A bad Master Observer to prevent user to create/delete table once.
+ */
+public class BadMasterObserverForCreateDeleteTable implements MasterObserver, 
MasterCoprocessor {
+  private boolean createFailedOnce = false;
+  private boolean deleteFailedOnce = false;
+
+  @Override
+  public void 
postCompletedCreateTableAction(ObserverContext<MasterCoprocessorEnvironment> 
ctx,
+    TableDescriptor desc, RegionInfo[] regions) throws IOException {
+    if (!createFailedOnce && !desc.getTableName().isSystemTable()) {
+      createFailedOnce = true;
+      throw new IOException("execute postCompletedCreateTableAction failed 
once.");
+    }
+  }
+
+  @Override
+  public void 
postCompletedDeleteTableAction(ObserverContext<MasterCoprocessorEnvironment> 
ctx,
+    TableName tableName) throws IOException {
+    if (!deleteFailedOnce && !tableName.isSystemTable()) {
+      deleteFailedOnce = true;
+      throw new IOException("execute postCompletedDeleteTableAction failed 
once.");
+    }
+  }
+
+  @Override
+  public Optional<MasterObserver> getMasterObserver() {
+    return Optional.of(this);
+  }
+}
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateDeleteTableProcedureWithRetry.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateDeleteTableProcedureWithRetry.java
new file mode 100644
index 00000000000..3491aa63984
--- /dev/null
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateDeleteTableProcedureWithRetry.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.procedure;
+
+import java.io.IOException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtil;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import 
org.apache.hadoop.hbase.coprocessor.BadMasterObserverForCreateDeleteTable;
+import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
+import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
+import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.ModifyRegionUtils;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category({ MasterTests.class, MediumTests.class })
+public class TestCreateDeleteTableProcedureWithRetry {
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+    HBaseClassTestRule.forClass(TestCreateDeleteTableProcedureWithRetry.class);
+
+  private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();
+
+  private static final TableName TABLE_NAME =
+    
TableName.valueOf(TestCreateDeleteTableProcedureWithRetry.class.getSimpleName());
+
+  private static final String CF = "cf";
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    Configuration conf = UTIL.getConfiguration();
+    conf.set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
+      BadMasterObserverForCreateDeleteTable.class.getName());
+    UTIL.startMiniCluster(1);
+  }
+
+  @AfterClass
+  public static void tearDown() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  @Test
+  public void testCreateDeleteTableRetry() throws IOException {
+    ProcedureExecutor<MasterProcedureEnv> procExec =
+      UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
+    TableDescriptor htd = MasterProcedureTestingUtility.createHTD(TABLE_NAME, 
CF);
+    RegionInfo[] regions = ModifyRegionUtils.createRegionInfos(htd, null);
+    CreateTableProcedure createProc =
+      new CreateTableProcedure(procExec.getEnvironment(), htd, regions);
+    ProcedureTestingUtility.submitAndWait(procExec, createProc);
+    Assert.assertTrue(UTIL.getAdmin().tableExists(TABLE_NAME));
+    
MasterProcedureTestingUtility.validateTableCreation(UTIL.getMiniHBaseCluster().getMaster(),
+      TABLE_NAME, regions, CF);
+
+    UTIL.getAdmin().disableTable(TABLE_NAME);
+    DeleteTableProcedure deleteProc =
+      new DeleteTableProcedure(procExec.getEnvironment(), TABLE_NAME);
+    ProcedureTestingUtility.submitAndWait(procExec, deleteProc);
+    Assert.assertFalse(UTIL.getAdmin().tableExists(TABLE_NAME));
+    
MasterProcedureTestingUtility.validateTableDeletion(UTIL.getMiniHBaseCluster().getMaster(),
+      TABLE_NAME);
+  }
+}
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableProcedure.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableProcedure.java
index bed41f4da86..0bb54cc190e 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableProcedure.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableProcedure.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
 import org.apache.hadoop.hbase.master.MasterFileSystem;
 import org.apache.hadoop.hbase.procedure2.Procedure;
 import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
+import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
 import 
org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
 import 
org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerForTest;
@@ -244,7 +245,8 @@ public class TestCreateTableProcedure extends 
TestTableDDLProcedureBase {
 
     @Override
     protected Flow executeFromState(MasterProcedureEnv env,
-      MasterProcedureProtos.CreateTableState state) throws 
InterruptedException {
+      MasterProcedureProtos.CreateTableState state)
+      throws InterruptedException, ProcedureSuspendedException {
 
       if (
         !failOnce && state == 
MasterProcedureProtos.CreateTableState.CREATE_TABLE_WRITE_FS_LAYOUT

Reply via email to