This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new 1d7ea885992 HBASE-28241 The snapshot operation encountered an NPE and 
failed. (#5560)
1d7ea885992 is described below

commit 1d7ea88599244208f79ab8be2ec13a5421622bb9
Author: hiping-tech <58875741+hiping-t...@users.noreply.github.com>
AuthorDate: Tue Dec 12 23:13:01 2023 +0800

    HBASE-28241 The snapshot operation encountered an NPE and failed. (#5560)
    
    Fixed the check for an ongoing Snapshot before proceeding with the 
merge/split region operation.
    
    Co-authored-by: lvhaiping.lhp <lvhaiping....@alibaba-inc.com>
    Signed-off-by: Duo Zhang <zhang...@apache.org>
    Signed-off-by: Hui Ruan <huir...@apache.org>
    (cherry picked from commit 3d117125892ee36e8a66171fba3a223c09bc0b9a)
---
 .../assignment/MergeTableRegionsProcedure.java     |  2 +-
 .../assignment/SplitTableRegionProcedure.java      |  3 +-
 .../assignment/TestMergeTableRegionsProcedure.java | 44 ++++++++++++++++++
 .../assignment/TestSplitTableRegionProcedure.java  | 53 ++++++++++++++++++++++
 .../master/procedure/TestSnapshotProcedure.java    | 24 ++++++++++
 5 files changed, 124 insertions(+), 2 deletions(-)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
index 813caa47d33..14f2676d48f 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
@@ -443,7 +443,7 @@ public class MergeTableRegionsProcedure
   private boolean prepareMergeRegion(final MasterProcedureEnv env) throws 
IOException {
     // Fail if we are taking snapshot for the given table
     TableName tn = regionsToMerge[0].getTable();
-    if (env.getMasterServices().getSnapshotManager().isTakingSnapshot(tn)) {
+    if 
(env.getMasterServices().getSnapshotManager().isTableTakingAnySnapshot(tn)) {
       throw new MergeRegionException("Skip merging regions "
         + RegionInfo.getShortNameToLog(regionsToMerge) + ", because we are 
snapshotting " + tn);
     }
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
index a0118cbd7b0..2e2182b25d2 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
@@ -505,7 +505,8 @@ public class SplitTableRegionProcedure
   public boolean prepareSplitRegion(final MasterProcedureEnv env) throws 
IOException {
     // Fail if we are taking snapshot for the given table
     if (
-      
env.getMasterServices().getSnapshotManager().isTakingSnapshot(getParentRegion().getTable())
+      env.getMasterServices().getSnapshotManager()
+        .isTableTakingAnySnapshot(getParentRegion().getTable())
     ) {
       setFailure(new IOException("Skip splitting region " + 
getParentRegion().getShortNameToLog()
         + ", because we are taking snapshot for the table " + 
getParentRegion().getTable()));
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java
index 41267a19373..7547b0421b2 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hbase.master.assignment;
 
+import static 
org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.assertProcFailed;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
@@ -33,16 +34,20 @@ import org.apache.hadoop.hbase.client.Admin;
 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.SnapshotDescription;
+import org.apache.hadoop.hbase.client.SnapshotType;
 import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.client.TableDescriptor;
 import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility;
+import org.apache.hadoop.hbase.master.procedure.TestSnapshotProcedure;
 import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
 import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
 import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -59,6 +64,9 @@ import org.junit.rules.TestName;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
+
 @Category({ MasterTests.class, LargeTests.class })
 public class TestMergeTableRegionsProcedure {
 
@@ -345,6 +353,42 @@ public class TestMergeTableRegionsProcedure {
     assertRegionCount(tableName, initialRegionCount - 1);
   }
 
+  @Test
+  public void testMergingRegionWhileTakingSnapshot() throws Exception {
+    final TableName tableName = 
TableName.valueOf("testMergingRegionWhileTakingSnapshot");
+    final ProcedureExecutor<MasterProcedureEnv> procExec = 
getMasterProcedureExecutor();
+
+    List<RegionInfo> tableRegions = createTable(tableName);
+
+    ProcedureTestingUtility.waitNoProcedureRunning(procExec);
+
+    SnapshotDescription snapshot =
+      new SnapshotDescription("SnapshotProcedureTest", tableName, 
SnapshotType.FLUSH);
+    SnapshotProtos.SnapshotDescription snapshotProto =
+      ProtobufUtil.createHBaseProtosSnapshotDesc(snapshot);
+    snapshotProto = SnapshotDescriptionUtils.validate(snapshotProto,
+      UTIL.getHBaseCluster().getMaster().getConfiguration());
+    long snapshotProcId = procExec.submitProcedure(
+      new 
TestSnapshotProcedure.DelaySnapshotProcedure(procExec.getEnvironment(), 
snapshotProto));
+    
UTIL.getHBaseCluster().getMaster().getSnapshotManager().registerSnapshotProcedure(snapshotProto,
+      snapshotProcId);
+
+    RegionInfo[] regionsToMerge = new RegionInfo[2];
+    regionsToMerge[0] = tableRegions.get(0);
+    regionsToMerge[1] = tableRegions.get(1);
+
+    long mergeProcId = procExec.submitProcedure(
+      new MergeTableRegionsProcedure(procExec.getEnvironment(), 
regionsToMerge, true));
+
+    ProcedureTestingUtility
+      
.waitProcedure(UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(), 
mergeProcId);
+    ProcedureTestingUtility.waitProcedure(
+      UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(), 
snapshotProcId);
+
+    assertProcFailed(procExec, mergeProcId);
+    assertEquals(initialRegionCount, 
UTIL.getAdmin().getRegions(tableName).size());
+  }
+
   private List<RegionInfo> createTable(final TableName tableName) throws 
Exception {
     TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName)
       .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build();
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java
index 5cbd53fd7e5..424dd32e1d5 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java
@@ -41,6 +41,8 @@ import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
 import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.SnapshotDescription;
+import org.apache.hadoop.hbase.client.SnapshotType;
 import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.client.TableDescriptor;
 import org.apache.hadoop.hbase.coprocessor.ObserverContext;
@@ -50,10 +52,12 @@ import org.apache.hadoop.hbase.coprocessor.RegionObserver;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
 import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility;
+import org.apache.hadoop.hbase.master.procedure.TestSnapshotProcedure;
 import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
 import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
 import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -69,6 +73,9 @@ import org.junit.rules.TestName;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
+
 @Category({ MasterTests.class, MediumTests.class })
 public class TestSplitTableRegionProcedure {
 
@@ -550,6 +557,52 @@ public class TestSplitTableRegionProcedure {
     verify(tableName, splitRowNum);
   }
 
+  @Test
+  public void testSplitRegionWhileTakingSnapshot() throws Exception {
+    final TableName tableName = TableName.valueOf(name.getMethodName());
+    final ProcedureExecutor<MasterProcedureEnv> procExec = 
getMasterProcedureExecutor();
+
+    RegionInfo[] regions = MasterProcedureTestingUtility.createTable(procExec, 
tableName, null,
+      columnFamilyName1, columnFamilyName2);
+    int splitRowNum = startRowNum + rowCount / 2;
+    byte[] splitKey = Bytes.toBytes("" + splitRowNum);
+
+    assertTrue("not able to find a splittable region", regions != null);
+    assertTrue("not able to find a splittable region", regions.length == 1);
+    ProcedureTestingUtility.waitNoProcedureRunning(procExec);
+
+    // task snapshot
+    SnapshotDescription snapshot =
+      new SnapshotDescription("SnapshotProcedureTest", tableName, 
SnapshotType.FLUSH);
+    SnapshotProtos.SnapshotDescription snapshotProto =
+      ProtobufUtil.createHBaseProtosSnapshotDesc(snapshot);
+    snapshotProto = SnapshotDescriptionUtils.validate(snapshotProto,
+      UTIL.getHBaseCluster().getMaster().getConfiguration());
+    long snapshotProcId = procExec.submitProcedure(
+      new 
TestSnapshotProcedure.DelaySnapshotProcedure(procExec.getEnvironment(), 
snapshotProto));
+    
UTIL.getHBaseCluster().getMaster().getSnapshotManager().registerSnapshotProcedure(snapshotProto,
+      snapshotProcId);
+
+    // collect AM metrics before test
+    collectAssignmentManagerMetrics();
+
+    // Split region of the table
+    long procId = procExec.submitProcedure(
+      new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], 
splitKey));
+    // Wait the completion
+    ProcedureTestingUtility.waitProcedure(procExec, procId);
+    ProcedureTestingUtility.waitProcedure(procExec, snapshotProcId);
+
+    ProcedureTestingUtility.assertProcFailed(procExec, procId);
+    ProcedureTestingUtility.assertProcNotFailed(procExec, snapshotProcId);
+
+    assertTrue(UTIL.getMiniHBaseCluster().getRegions(tableName).size() == 1);
+    assertTrue(UTIL.countRows(tableName) == 0);
+
+    assertEquals(splitSubmittedCount + 1, 
splitProcMetrics.getSubmittedCounter().getCount());
+    assertEquals(splitFailedCount + 1, 
splitProcMetrics.getFailedCounter().getCount());
+  }
+
   private void deleteData(final TableName tableName, final int 
startDeleteRowNum)
     throws IOException, InterruptedException {
     Table t = UTIL.getConnection().getTable(tableName);
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedure.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedure.java
index 4d5eaeb5bfe..8e85490601f 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedure.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedure.java
@@ -17,10 +17,12 @@
  */
 package org.apache.hadoop.hbase.master.procedure;
 
+import static 
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotState.SNAPSHOT_SNAPSHOT_ONLINE_REGIONS;
 import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
 import java.util.Optional;
+import java.util.concurrent.TimeUnit;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
@@ -51,6 +53,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
 import 
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotState;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
 
@@ -70,6 +73,27 @@ public class TestSnapshotProcedure {
   protected SnapshotDescription snapshot;
   protected SnapshotProtos.SnapshotDescription snapshotProto;
 
+  public static final class DelaySnapshotProcedure extends SnapshotProcedure {
+    public DelaySnapshotProcedure() {
+    }
+
+    public DelaySnapshotProcedure(final MasterProcedureEnv env,
+      final SnapshotProtos.SnapshotDescription snapshot) {
+      super(env, snapshot);
+    }
+
+    @Override
+    protected Flow executeFromState(MasterProcedureEnv env,
+      MasterProcedureProtos.SnapshotState state)
+      throws ProcedureSuspendedException, ProcedureYieldException, 
InterruptedException {
+      Flow flow = super.executeFromState(env, state);
+      if (state == SNAPSHOT_SNAPSHOT_ONLINE_REGIONS) {
+        TimeUnit.SECONDS.sleep(20);
+      }
+      return flow;
+    }
+  }
+
   @Before
   public void setup() throws Exception {
     TEST_UTIL = new HBaseTestingUtility();

Reply via email to