Repository: hbase Updated Branches: refs/heads/master 3a75505cf -> 5efa5f6de
HBASE-21330 ReopenTableRegionsProcedure will enter an infinite loop if we schedule a TRSP at the same time Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/5efa5f6d Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/5efa5f6d Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/5efa5f6d Branch: refs/heads/master Commit: 5efa5f6de4388fc79919d977b58a351debcc8555 Parents: 3a75505 Author: Duo Zhang <[email protected]> Authored: Wed Oct 17 18:04:24 2018 +0800 Committer: Duo Zhang <[email protected]> Committed: Thu Oct 18 11:29:12 2018 +0800 ---------------------------------------------------------------------- .../hadoop/hbase/procedure2/Procedure.java | 3 +- .../procedure/ReopenTableRegionsProcedure.java | 27 ++++-- ...ReopenTableRegionsProcedureInfiniteLoop.java | 90 ++++++++++++++++++++ 3 files changed, 114 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/5efa5f6d/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java ---------------------------------------------------------------------- diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java index 0b7e0c0..191a4b0 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java @@ -322,7 +322,8 @@ public abstract class Procedure<TEnvironment> implements Comparable<Procedure<TE * @see #holdLock(Object) * @return true if the procedure has the lock, false otherwise. */ - protected final boolean hasLock() { + @VisibleForTesting + public final boolean hasLock() { return locked; } http://git-wip-us.apache.org/repos/asf/hbase/blob/5efa5f6d/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java index 5b36f30..0634815 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java @@ -70,6 +70,18 @@ public class ReopenTableRegionsProcedure return TableOperationType.REGION_EDIT; } + private boolean canSchedule(MasterProcedureEnv env, HRegionLocation loc) { + if (loc.getSeqNum() < 0) { + return false; + } + RegionStateNode regionNode = + env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion()); + // If the region node is null, then at least in the next round we can remove this region to make + // progress. And the second condition is a normal one, if there are no TRSP with it then we can + // schedule one to make progress. + return regionNode == null || !regionNode.isInTransition(); + } + @Override protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState state) throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { @@ -85,8 +97,12 @@ public class ReopenTableRegionsProcedure return Flow.HAS_MORE_STATE; case REOPEN_TABLE_REGIONS_REOPEN_REGIONS: for (HRegionLocation loc : regions) { - RegionStateNode regionNode = env.getAssignmentManager().getRegionStates() - .getOrCreateRegionStateNode(loc.getRegion()); + RegionStateNode regionNode = + env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion()); + // this possible, maybe the region has already been merged or split, see HBASE-20921 + if (regionNode == null) { + continue; + } TransitRegionStateProcedure proc; regionNode.lock(); try { @@ -108,13 +124,13 @@ public class ReopenTableRegionsProcedure if (regions.isEmpty()) { return Flow.NO_MORE_STATE; } - if (regions.stream().anyMatch(l -> l.getSeqNum() >= 0)) { + if (regions.stream().anyMatch(loc -> canSchedule(env, loc))) { attempt = 0; setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS); return Flow.HAS_MORE_STATE; } - // All the regions need to reopen are in OPENING state which means we can not schedule any - // MRPs. + // We can not schedule TRSP for all the regions need to reopen, wait for a while and retry + // again. long backoff = ProcedureUtil.getBackoffTimeMs(this.attempt++); LOG.info( "There are still {} region(s) which need to be reopened for table {} are in " + @@ -138,6 +154,7 @@ public class ReopenTableRegionsProcedure env.getProcedureScheduler().addFront(this); return false; // 'false' means that this procedure handled the timeout } + @Override protected void rollbackState(MasterProcedureEnv env, ReopenTableRegionsState state) throws IOException, InterruptedException { http://git-wip-us.apache.org/repos/asf/hbase/blob/5efa5f6d/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestReopenTableRegionsProcedureInfiniteLoop.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestReopenTableRegionsProcedureInfiniteLoop.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestReopenTableRegionsProcedureInfiniteLoop.java new file mode 100644 index 0000000..870f3bf --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestReopenTableRegionsProcedureInfiniteLoop.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.procedure; + +import java.io.IOException; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.ServerManager; +import org.apache.hadoop.hbase.master.assignment.AssignmentManager; +import org.apache.hadoop.hbase.master.assignment.RegionStateNode; +import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure; +import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Testcase for HBASE-21330. + */ +@Category({ MasterTests.class, MediumTests.class }) +public class TestReopenTableRegionsProcedureInfiniteLoop { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestReopenTableRegionsProcedureInfiniteLoop.class); + + private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + + private static TableName TABLE_NAME = TableName.valueOf("InfiniteLoop"); + + private static byte[] CF = Bytes.toBytes("cf"); + + @BeforeClass + public static void setUp() throws Exception { + UTIL.getConfiguration().setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1); + UTIL.startMiniCluster(1); + UTIL.createTable(TABLE_NAME, CF); + } + + @AfterClass + public static void tearDown() throws Exception { + UTIL.shutdownMiniCluster(); + } + + @Test + public void testInfiniteLoop() throws IOException { + HMaster master = UTIL.getMiniHBaseCluster().getMaster(); + AssignmentManager am = master.getAssignmentManager(); + ProcedureExecutor<MasterProcedureEnv> exec = master.getMasterProcedureExecutor(); + RegionInfo regionInfo = UTIL.getAdmin().getRegions(TABLE_NAME).get(0); + RegionStateNode regionNode = am.getRegionStates().getRegionStateNode(regionInfo); + long procId; + ReopenTableRegionsProcedure proc = new ReopenTableRegionsProcedure(TABLE_NAME); + regionNode.lock(); + try { + procId = exec.submitProcedure(proc); + UTIL.waitFor(30000, () -> proc.hasLock()); + TransitRegionStateProcedure trsp = + TransitRegionStateProcedure.reopen(exec.getEnvironment(), regionInfo); + regionNode.setProcedure(trsp); + exec.submitProcedure(trsp); + } finally { + regionNode.unlock(); + } + UTIL.waitFor(60000, () -> exec.isFinished(procId)); + } +}
