This is an automated email from the ASF dual-hosted git repository. zhangduo pushed a commit to branch branch-2 in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2 by this push: new 1d7ea885992 HBASE-28241 The snapshot operation encountered an NPE and failed. (#5560) 1d7ea885992 is described below commit 1d7ea88599244208f79ab8be2ec13a5421622bb9 Author: hiping-tech <58875741+hiping-t...@users.noreply.github.com> AuthorDate: Tue Dec 12 23:13:01 2023 +0800 HBASE-28241 The snapshot operation encountered an NPE and failed. (#5560) Fixed the check for an ongoing Snapshot before proceeding with the merge/split region operation. Co-authored-by: lvhaiping.lhp <lvhaiping....@alibaba-inc.com> Signed-off-by: Duo Zhang <zhang...@apache.org> Signed-off-by: Hui Ruan <huir...@apache.org> (cherry picked from commit 3d117125892ee36e8a66171fba3a223c09bc0b9a) --- .../assignment/MergeTableRegionsProcedure.java | 2 +- .../assignment/SplitTableRegionProcedure.java | 3 +- .../assignment/TestMergeTableRegionsProcedure.java | 44 ++++++++++++++++++ .../assignment/TestSplitTableRegionProcedure.java | 53 ++++++++++++++++++++++ .../master/procedure/TestSnapshotProcedure.java | 24 ++++++++++ 5 files changed, 124 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java index 813caa47d33..14f2676d48f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java @@ -443,7 +443,7 @@ public class MergeTableRegionsProcedure private boolean prepareMergeRegion(final MasterProcedureEnv env) throws IOException { // Fail if we are taking snapshot for the given table TableName tn = regionsToMerge[0].getTable(); - if (env.getMasterServices().getSnapshotManager().isTakingSnapshot(tn)) { + if (env.getMasterServices().getSnapshotManager().isTableTakingAnySnapshot(tn)) { throw new MergeRegionException("Skip merging regions " + RegionInfo.getShortNameToLog(regionsToMerge) + ", because we are snapshotting " + tn); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java index a0118cbd7b0..2e2182b25d2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java @@ -505,7 +505,8 @@ public class SplitTableRegionProcedure public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException { // Fail if we are taking snapshot for the given table if ( - env.getMasterServices().getSnapshotManager().isTakingSnapshot(getParentRegion().getTable()) + env.getMasterServices().getSnapshotManager() + .isTableTakingAnySnapshot(getParentRegion().getTable()) ) { setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog() + ", because we are taking snapshot for the table " + getParentRegion().getTable())); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java index 41267a19373..7547b0421b2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hbase.master.assignment; +import static org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.assertProcFailed; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -33,16 +34,20 @@ import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.SnapshotDescription; +import org.apache.hadoop.hbase.client.SnapshotType; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableDescriptorBuilder; import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility; +import org.apache.hadoop.hbase.master.procedure.TestSnapshotProcedure; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.util.Bytes; @@ -59,6 +64,9 @@ import org.junit.rules.TestName; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; + @Category({ MasterTests.class, LargeTests.class }) public class TestMergeTableRegionsProcedure { @@ -345,6 +353,42 @@ public class TestMergeTableRegionsProcedure { assertRegionCount(tableName, initialRegionCount - 1); } + @Test + public void testMergingRegionWhileTakingSnapshot() throws Exception { + final TableName tableName = TableName.valueOf("testMergingRegionWhileTakingSnapshot"); + final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); + + List<RegionInfo> tableRegions = createTable(tableName); + + ProcedureTestingUtility.waitNoProcedureRunning(procExec); + + SnapshotDescription snapshot = + new SnapshotDescription("SnapshotProcedureTest", tableName, SnapshotType.FLUSH); + SnapshotProtos.SnapshotDescription snapshotProto = + ProtobufUtil.createHBaseProtosSnapshotDesc(snapshot); + snapshotProto = SnapshotDescriptionUtils.validate(snapshotProto, + UTIL.getHBaseCluster().getMaster().getConfiguration()); + long snapshotProcId = procExec.submitProcedure( + new TestSnapshotProcedure.DelaySnapshotProcedure(procExec.getEnvironment(), snapshotProto)); + UTIL.getHBaseCluster().getMaster().getSnapshotManager().registerSnapshotProcedure(snapshotProto, + snapshotProcId); + + RegionInfo[] regionsToMerge = new RegionInfo[2]; + regionsToMerge[0] = tableRegions.get(0); + regionsToMerge[1] = tableRegions.get(1); + + long mergeProcId = procExec.submitProcedure( + new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true)); + + ProcedureTestingUtility + .waitProcedure(UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(), mergeProcId); + ProcedureTestingUtility.waitProcedure( + UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(), snapshotProcId); + + assertProcFailed(procExec, mergeProcId); + assertEquals(initialRegionCount, UTIL.getAdmin().getRegions(tableName).size()); + } + private List<RegionInfo> createTable(final TableName tableName) throws Exception { TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName) .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java index 5cbd53fd7e5..424dd32e1d5 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java @@ -41,6 +41,8 @@ import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.SnapshotDescription; +import org.apache.hadoop.hbase.client.SnapshotType; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.coprocessor.ObserverContext; @@ -50,10 +52,12 @@ import org.apache.hadoop.hbase.coprocessor.RegionObserver; import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility; +import org.apache.hadoop.hbase.master.procedure.TestSnapshotProcedure; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; @@ -69,6 +73,9 @@ import org.junit.rules.TestName; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; + @Category({ MasterTests.class, MediumTests.class }) public class TestSplitTableRegionProcedure { @@ -550,6 +557,52 @@ public class TestSplitTableRegionProcedure { verify(tableName, splitRowNum); } + @Test + public void testSplitRegionWhileTakingSnapshot() throws Exception { + final TableName tableName = TableName.valueOf(name.getMethodName()); + final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); + + RegionInfo[] regions = MasterProcedureTestingUtility.createTable(procExec, tableName, null, + columnFamilyName1, columnFamilyName2); + int splitRowNum = startRowNum + rowCount / 2; + byte[] splitKey = Bytes.toBytes("" + splitRowNum); + + assertTrue("not able to find a splittable region", regions != null); + assertTrue("not able to find a splittable region", regions.length == 1); + ProcedureTestingUtility.waitNoProcedureRunning(procExec); + + // task snapshot + SnapshotDescription snapshot = + new SnapshotDescription("SnapshotProcedureTest", tableName, SnapshotType.FLUSH); + SnapshotProtos.SnapshotDescription snapshotProto = + ProtobufUtil.createHBaseProtosSnapshotDesc(snapshot); + snapshotProto = SnapshotDescriptionUtils.validate(snapshotProto, + UTIL.getHBaseCluster().getMaster().getConfiguration()); + long snapshotProcId = procExec.submitProcedure( + new TestSnapshotProcedure.DelaySnapshotProcedure(procExec.getEnvironment(), snapshotProto)); + UTIL.getHBaseCluster().getMaster().getSnapshotManager().registerSnapshotProcedure(snapshotProto, + snapshotProcId); + + // collect AM metrics before test + collectAssignmentManagerMetrics(); + + // Split region of the table + long procId = procExec.submitProcedure( + new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey)); + // Wait the completion + ProcedureTestingUtility.waitProcedure(procExec, procId); + ProcedureTestingUtility.waitProcedure(procExec, snapshotProcId); + + ProcedureTestingUtility.assertProcFailed(procExec, procId); + ProcedureTestingUtility.assertProcNotFailed(procExec, snapshotProcId); + + assertTrue(UTIL.getMiniHBaseCluster().getRegions(tableName).size() == 1); + assertTrue(UTIL.countRows(tableName) == 0); + + assertEquals(splitSubmittedCount + 1, splitProcMetrics.getSubmittedCounter().getCount()); + assertEquals(splitFailedCount + 1, splitProcMetrics.getFailedCounter().getCount()); + } + private void deleteData(final TableName tableName, final int startDeleteRowNum) throws IOException, InterruptedException { Table t = UTIL.getConnection().getTable(tableName); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedure.java index 4d5eaeb5bfe..8e85490601f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedure.java @@ -17,10 +17,12 @@ */ package org.apache.hadoop.hbase.master.procedure; +import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotState.SNAPSHOT_SNAPSHOT_ONLINE_REGIONS; import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.Optional; +import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; @@ -51,6 +53,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotState; import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; @@ -70,6 +73,27 @@ public class TestSnapshotProcedure { protected SnapshotDescription snapshot; protected SnapshotProtos.SnapshotDescription snapshotProto; + public static final class DelaySnapshotProcedure extends SnapshotProcedure { + public DelaySnapshotProcedure() { + } + + public DelaySnapshotProcedure(final MasterProcedureEnv env, + final SnapshotProtos.SnapshotDescription snapshot) { + super(env, snapshot); + } + + @Override + protected Flow executeFromState(MasterProcedureEnv env, + MasterProcedureProtos.SnapshotState state) + throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { + Flow flow = super.executeFromState(env, state); + if (state == SNAPSHOT_SNAPSHOT_ONLINE_REGIONS) { + TimeUnit.SECONDS.sleep(20); + } + return flow; + } + } + @Before public void setup() throws Exception { TEST_UTIL = new HBaseTestingUtility();