This is an automated email from the ASF dual-hosted git repository.
zhangduo pushed a commit to branch branch-2.5
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2.5 by this push:
new b127ab08c7f HBASE-28158 Decouple RIT list management from TRSP (#7450)
b127ab08c7f is described below
commit b127ab08c7fb14ca37e37168bc56fbb390e068f0
Author: Umesh <[email protected]>
AuthorDate: Mon Dec 1 21:32:54 2025 +0530
HBASE-28158 Decouple RIT list management from TRSP (#7450)
Co-authored-by: ukumawat <[email protected]>
Signed-off-by: Andrew Purtell <[email protected]>
Signed-off-by: Duo Zhang <[email protected]>
---
.../hadoop/hbase/rsgroup/RSGroupAdminServer.java | 17 +--
.../hbase/rsgroup/TestRSGroupsOfflineMode.java | 2 +-
.../tmpl/master/AssignmentManagerStatusTmpl.jamon | 3 +-
.../org/apache/hadoop/hbase/master/DeadServer.java | 9 ++
.../org/apache/hadoop/hbase/master/HMaster.java | 20 +--
.../hadoop/hbase/master/RegionServerTracker.java | 26 ++--
.../apache/hadoop/hbase/master/ServerManager.java | 9 +-
.../hbase/master/assignment/AssignmentManager.java | 105 ++++++++++++--
.../master/assignment/AssignmentManagerUtil.java | 8 +-
.../assignment/RegionInTransitionTracker.java | 159 +++++++++++++++++++++
.../hbase/master/assignment/RegionStateNode.java | 22 +--
.../hbase/master/assignment/RegionStates.java | 95 ++----------
.../hbase/master/procedure/ProcedureSyncWait.java | 2 +-
.../procedure/ReopenTableRegionsProcedure.java | 2 +-
.../master/procedure/ServerCrashProcedure.java | 4 +
.../apache/hadoop/hbase/HBaseTestingUtility.java | 48 ++++++-
.../hbase/client/TestAsyncRegionAdminApi.java | 4 +-
.../hbase/client/TestSplitOrMergeStatus.java | 4 +-
.../hbase/master/TestAssignmentManagerMetrics.java | 2 +-
.../apache/hadoop/hbase/master/TestDeadServer.java | 3 +-
.../hbase/master/TestMasterBalanceThrottling.java | 8 +-
.../hbase/master/TestMasterDryRunBalancer.java | 2 +-
.../master/assignment/AssignmentTestingUtil.java | 4 +-
.../master/assignment/MockMasterServices.java | 1 +
.../assignment/TestAssignmentManagerBase.java | 2 +-
.../assignment/TestAssignmentManagerUtil.java | 2 +-
.../TestTransitRegionStateProcedure.java | 6 +-
.../TestFavoredStochasticLoadBalancer.java | 15 +-
.../hbase/master/http/TestMasterStatusServlet.java | 2 +-
.../janitor/TestCatalogJanitorInMemoryStates.java | 9 +-
.../procedure/MasterProcedureTestingUtility.java | 1 +
.../hbase/regionserver/TestHRegionFileSystem.java | 6 +-
.../TestRegionMergeTransactionOnCluster.java | 17 +--
.../TestSplitTransactionOnCluster.java | 15 +-
34 files changed, 438 insertions(+), 196 deletions(-)
diff --git
a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java
b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java
index 9d1b03e18eb..2a97668e0f9 100644
---
a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java
+++
b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java
@@ -26,6 +26,7 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.SortedSet;
import java.util.concurrent.Future;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
@@ -47,7 +48,6 @@ import org.apache.hadoop.hbase.master.RegionPlan;
import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.master.ServerManager;
import org.apache.hadoop.hbase.master.TableStateManager;
-import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
import org.apache.hadoop.hbase.master.procedure.ProcedureSyncWait;
import org.apache.hadoop.hbase.net.Address;
@@ -532,8 +532,7 @@ public class RSGroupAdminServer implements RSGroupAdmin {
Map<String, RegionState> groupRIT =
rsGroupGetRegionsInTransition(groupName);
if (groupRIT.size() > 0 && !request.isIgnoreRegionsInTransition()) {
LOG.debug("Not running balancer because {} region(s) in transition:
{}", groupRIT.size(),
- StringUtils.abbreviate(
-
master.getAssignmentManager().getRegionStates().getRegionsInTransition().toString(),
+
StringUtils.abbreviate(master.getAssignmentManager().getRegionsInTransition().toString(),
256));
return responseBuilder.build();
}
@@ -654,14 +653,12 @@ public class RSGroupAdminServer implements RSGroupAdmin {
private Map<String, RegionState> rsGroupGetRegionsInTransition(String
groupName)
throws IOException {
+ SortedSet<TableName> tablesInGroup = getRSGroupInfo(groupName).getTables();
Map<String, RegionState> rit = Maps.newTreeMap();
- AssignmentManager am = master.getAssignmentManager();
- for (TableName tableName : getRSGroupInfo(groupName).getTables()) {
- for (RegionInfo regionInfo :
am.getRegionStates().getRegionsOfTable(tableName)) {
- RegionState state =
am.getRegionStates().getRegionTransitionState(regionInfo);
- if (state != null) {
- rit.put(regionInfo.getEncodedName(), state);
- }
+ for (RegionStateNode regionNode :
master.getAssignmentManager().getRegionsInTransition()) {
+ TableName tn = regionNode.getTable();
+ if (tablesInGroup.contains(tn)) {
+ rit.put(regionNode.getRegionInfo().getEncodedName(),
regionNode.toRegionState());
}
}
return rit;
diff --git
a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsOfflineMode.java
b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsOfflineMode.java
index 452fd63d3c8..0d1810101a3 100644
---
a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsOfflineMode.java
+++
b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsOfflineMode.java
@@ -138,7 +138,7 @@ public class TestRSGroupsOfflineMode {
@Override
public boolean evaluate() throws Exception {
return groupRS.getNumberOfOnlineRegions() < 1
- &&
master.getAssignmentManager().getRegionStates().getRegionsInTransitionCount() <
1;
+ && master.getAssignmentManager().getRegionsInTransitionCount() < 1;
}
});
// Move table to group and wait.
diff --git
a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
index ee899a7340d..8a7ca8baf7d 100644
---
a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
+++
b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
@@ -40,8 +40,7 @@ int limit = 100;
</%args>
<%java>
-SortedSet<RegionState> rit = assignmentManager.getRegionStates()
- .getRegionsInTransitionOrderedByTimestamp();
+SortedSet<RegionState> rit = assignmentManager.getRegionsStateInTransition();
</%java>
<%if !rit.isEmpty() %>
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
index 84d660e66ee..d572946211a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
@@ -115,6 +115,10 @@ public class DeadServer {
}
}
+ synchronized void putIfAbsent(ServerName sn, long crashedTime) {
+ this.deadServers.putIfAbsent(sn, crashedTime);
+ }
+
public synchronized int size() {
return deadServers.size();
}
@@ -214,6 +218,11 @@ public class DeadServer {
return time == null ? null : new Date(time);
}
+ public synchronized long getDeathTimestamp(final ServerName deadServerName) {
+ Long time = deadServers.get(deadServerName);
+ return time == null ? 0 : time;
+ }
+
/**
* Called from rpc by operator cleaning up deadserver list.
* @param deadServerName the dead server name
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 1f221b71445..982bbe3a7e1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -992,10 +992,11 @@ public class HMaster extends HRegionServer implements
MasterServices {
// TODO: Generate the splitting and live Set in one pass instead of two as
we currently do.
this.regionServerTracker.upgrade(
procsByType.getOrDefault(ServerCrashProcedure.class,
Collections.emptyList()).stream()
- .map(p -> (ServerCrashProcedure) p).map(p ->
p.getServerName()).collect(Collectors.toSet()),
+ .map(p -> (ServerCrashProcedure) p).collect(
+ Collectors.toMap(ServerCrashProcedure::getServerName,
Procedure::getSubmittedTime)),
Sets.union(rsListStorage.getAll(),
walManager.getLiveServersFromWALDir()),
walManager.getSplittingServersFromWALDir());
- // This manager will be started AFTER hbase:meta is confirmed on line.
+ // This manager will be started AFTER hbase:meta is confirmed on line..
// hbase.mirror.table.state.to.zookeeper is so hbase1 clients can connect.
They read table
// state from zookeeper while hbase2 reads it from hbase:meta. Disable if
no hbase1 clients.
this.tableStateManager =
@@ -1093,6 +1094,7 @@ public class HMaster extends HRegionServer implements
MasterServices {
final ColumnFamilyDescriptor replBarrierFamilyDesc =
metaDescriptor.getColumnFamily(HConstants.REPLICATION_BARRIER_FAMILY);
+ this.assignmentManager.initializationPostMetaOnline();
this.assignmentManager.joinCluster();
// The below depends on hbase:meta being online.
try {
@@ -1910,7 +1912,7 @@ public class HMaster extends HRegionServer implements
MasterServices {
// But if there are zero regions in transition, it can skip sleep to speed
up.
while (
!interrupted && EnvironmentEdgeManager.currentTime() <
nextBalanceStartTime
- && this.assignmentManager.getRegionStates().hasRegionsInTransition()
+ && this.assignmentManager.getRegionTransitScheduledCount() > 0
) {
try {
Thread.sleep(100);
@@ -1922,8 +1924,7 @@ public class HMaster extends HRegionServer implements
MasterServices {
// Throttling by max number regions in transition
while (
!interrupted && maxRegionsInTransition > 0
- &&
this.assignmentManager.getRegionStates().getRegionsInTransitionCount()
- >= maxRegionsInTransition
+ && this.assignmentManager.getRegionTransitScheduledCount() >=
maxRegionsInTransition
&& EnvironmentEdgeManager.currentTime() <= cutoffTime
) {
try {
@@ -2004,7 +2005,7 @@ public class HMaster extends HRegionServer implements
MasterServices {
synchronized (this.balancer) {
// Only allow one balance run at at time.
- if (this.assignmentManager.hasRegionsInTransition()) {
+ if (this.assignmentManager.getRegionTransitScheduledCount() > 0) {
List<RegionStateNode> regionsInTransition =
assignmentManager.getRegionsInTransition();
// if hbase:meta region is in transition, result of assignment cannot
be recorded
// ignore the force flag in that case
@@ -2019,7 +2020,8 @@ public class HMaster extends HRegionServer implements
MasterServices {
if (!request.isIgnoreRegionsInTransition() || metaInTransition) {
LOG.info("Not running balancer (ignoreRIT=false" + ", metaRIT=" +
metaInTransition
- + ") because " + regionsInTransition.size() + " region(s) in
transition: " + toPrint
+ + ") because " + assignmentManager.getRegionTransitScheduledCount()
+ + " region(s) are scheduled to transit " + toPrint
+ (truncated ? "(truncated list)" : ""));
return responseBuilder.build();
}
@@ -2152,7 +2154,7 @@ public class HMaster extends HRegionServer implements
MasterServices {
if (skipRegionManagementAction("region normalizer")) {
return false;
}
- if (assignmentManager.hasRegionsInTransition()) {
+ if (assignmentManager.getRegionTransitScheduledCount() > 0) {
return false;
}
@@ -2929,7 +2931,7 @@ public class HMaster extends HRegionServer implements
MasterServices {
case REGIONS_IN_TRANSITION: {
if (assignmentManager != null) {
builder.setRegionsInTransition(
-
assignmentManager.getRegionStates().getRegionsStateInTransition());
+ new
ArrayList<>(assignmentManager.getRegionsStateInTransition()));
}
break;
}
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java
index 5ecf6a2f6e6..35995ff1ac8 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java
@@ -24,6 +24,7 @@ import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.Collections;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -116,22 +117,26 @@ public class RegionServerTracker extends ZKListener {
* In this method, we will also construct the region server sets in {@link
ServerManager}. If a
* region server is dead between the crash of the previous master instance
and the start of the
* current master instance, we will schedule a SCP for it. This is done in
- * {@link ServerManager#findDeadServersAndProcess(Set, Set)}, we call it
here under the lock
+ * {@link ServerManager#findDeadServersAndProcess(Map, Set)}, we call it
here under the lock
* protection to prevent concurrency issues with server expiration operation.
- * @param deadServersFromPE the region servers which already have
SCP associated.
- * @param liveServersBeforeRestart the live region servers we recorded
before master restarts.
- * @param splittingServersFromWALDir Servers whose WALs are being actively
'split'.
+ * @param deadServersWithDeathTimeFromPE the region servers which already
have SCP associated,
+ * have deathTime as the value.
+ * @param liveServersBeforeRestart the live region servers we recorded
before master
+ * restarts.
+ * @param splittingServersFromWALDir Servers whose WALs are being
actively 'split'.
*/
- public void upgrade(Set<ServerName> deadServersFromPE, Set<ServerName>
liveServersBeforeRestart,
- Set<ServerName> splittingServersFromWALDir) throws KeeperException,
IOException {
+ public void upgrade(Map<ServerName, Long> deadServersWithDeathTimeFromPE,
+ Set<ServerName> liveServersBeforeRestart, Set<ServerName>
splittingServersFromWALDir)
+ throws KeeperException, IOException {
LOG.info(
"Upgrading RegionServerTracker to active master mode; {} have existing"
+ "ServerCrashProcedures, {} possibly 'live' servers, and {}
'splitting'.",
- deadServersFromPE.size(), liveServersBeforeRestart.size(),
splittingServersFromWALDir.size());
- // deadServersFromPE is made from a list of outstanding
ServerCrashProcedures.
+ deadServersWithDeathTimeFromPE.size(), liveServersBeforeRestart.size(),
+ splittingServersFromWALDir.size());
+ // deadServersWithDeathTimeFromPE is made from a list of outstanding
ServerCrashProcedures.
// splittingServersFromWALDir are being actively split -- the directory in
the FS ends in
// '-SPLITTING'. Each splitting server should have a corresponding SCP.
Log if not.
- splittingServersFromWALDir.stream().filter(s ->
!deadServersFromPE.contains(s))
+ splittingServersFromWALDir.stream().filter(s ->
!deadServersWithDeathTimeFromPE.containsKey(s))
.forEach(s -> LOG.error("{} has no matching ServerCrashProcedure", s));
// create ServerNode for all possible live servers from wal directory
liveServersBeforeRestart
@@ -148,7 +153,8 @@ public class RegionServerTracker extends ZKListener {
: ServerMetricsBuilder.of(serverName);
serverManager.checkAndRecordNewServer(serverName, serverMetrics);
}
- serverManager.findDeadServersAndProcess(deadServersFromPE,
liveServersBeforeRestart);
+ serverManager.findDeadServersAndProcess(deadServersWithDeathTimeFromPE,
+ liveServersBeforeRestart);
active = true;
}
}
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
index e0e8fd035e6..8d263e6b1bf 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
@@ -319,12 +319,13 @@ public class ServerManager {
* <p/>
* Must be called inside the initialization method of {@code
RegionServerTracker} to avoid
* concurrency issue.
- * @param deadServersFromPE the region servers which already have a SCP
associated.
- * @param liveServersFromWALDir the live region servers from wal directory.
+ * @param deadServersWithDeathTimeFromPE the region servers which already
have an SCP associated,
+ * have time of death as value.
+ * @param liveServersFromWALDir the live region servers from wal
directory.
*/
- void findDeadServersAndProcess(Set<ServerName> deadServersFromPE,
+ void findDeadServersAndProcess(Map<ServerName, Long>
deadServersWithDeathTimeFromPE,
Set<ServerName> liveServersFromWALDir) {
- deadServersFromPE.forEach(deadservers::putIfAbsent);
+ deadServersWithDeathTimeFromPE.forEach(deadservers::putIfAbsent);
liveServersFromWALDir.stream().filter(sn -> !onlineServers.containsKey(sn))
.forEach(this::expireServer);
}
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index 6c6aa202abb..b811791a8f4 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -22,11 +22,14 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
+import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
@@ -202,6 +205,9 @@ public class AssignmentManager {
private Thread assignThread;
+ private final RegionInTransitionTracker regionInTransitionTracker =
+ new RegionInTransitionTracker();
+
public AssignmentManager(MasterServices master, MasterRegion masterRegion) {
this(master, masterRegion, new RegionStateStore(master, masterRegion));
}
@@ -294,6 +300,8 @@ public class AssignmentManager {
regionNode.setLastHost(lastHost);
regionNode.setRegionLocation(regionLocation);
regionNode.setOpenSeqNum(openSeqNum);
+
regionInTransitionTracker.handleRegionStateNodeOperation(regionNode);
+
if (regionNode.getProcedure() != null) {
regionNode.getProcedure().stateLoaded(this, regionNode);
}
@@ -339,11 +347,17 @@ public class AssignmentManager {
return;
}
}
- LOG.info("Attach {} to {} to restore RIT", proc, regionNode);
+ LOG.info("Attach {} to {}", proc, regionNode);
regionNode.setProcedure(proc);
});
}
+ public void initializationPostMetaOnline() {
+ // now that we are sure that meta is online, we can set TableStateManger in
+ // regionInTransitionTracker
+
regionInTransitionTracker.setTableStateManager(master.getTableStateManager());
+ }
+
public void stop() {
if (!running.compareAndSet(true, false)) {
return;
@@ -368,6 +382,7 @@ public class AssignmentManager {
// Stop the RegionStateStore
regionStates.clear();
+ regionInTransitionTracker.stop();
// Update meta events (for testing)
if (hasProcExecutor) {
@@ -1025,7 +1040,7 @@ public class AssignmentManager {
regionNode.lock();
try {
if (shouldSubmit.apply(regionNode)) {
- if (regionNode.isInTransition()) {
+ if (regionNode.isTransitionScheduled()) {
logRIT.accept(regionNode);
inTransitionCount++;
continue;
@@ -1616,10 +1631,8 @@ public class AssignmentManager {
}
protected void update(final AssignmentManager am) {
- final RegionStates regionStates = am.getRegionStates();
this.statTimestamp = EnvironmentEdgeManager.currentTime();
- update(regionStates.getRegionsStateInTransition(), statTimestamp);
- update(regionStates.getRegionFailedOpen(), statTimestamp);
+ update(am.getRegionsStateInTransition(), statTimestamp);
if (LOG.isDebugEnabled() && ritsOverThreshold != null &&
!ritsOverThreshold.isEmpty()) {
LOG.debug("RITs over threshold: {}",
@@ -1781,6 +1794,20 @@ public class AssignmentManager {
if (regionNode.getProcedure() != null) {
regionNode.getProcedure().stateLoaded(AssignmentManager.this,
regionNode);
}
+ // add regions to RIT while visiting the meta
+ regionInTransitionTracker.handleRegionStateNodeOperation(regionNode);
+ // If region location of region belongs to a dead server mark the region
crashed
+ if (
+ regionNode.getRegionLocation() != null
+ &&
master.getServerManager().isServerDead(regionNode.getRegionLocation())
+ ) {
+ long timeOfCrash = master.getServerManager().getDeadServers()
+ .getDeathTimestamp(regionNode.getRegionLocation());
+ if (timeOfCrash != 0) {
+ regionNode.crashed(timeOfCrash);
+ }
+ regionInTransitionTracker.regionCrashed(regionNode);
+ }
}
};
@@ -1946,15 +1973,52 @@ public class AssignmentManager {
return new Pair<Integer, Integer>(ritCount, states.size());
}
+ // This comparator sorts the RegionStates by time stamp then Region name.
+ // Comparing by timestamp alone can lead us to discard different
RegionStates that happen
+ // to share a timestamp.
+ private final static class RegionStateStampComparator implements
Comparator<RegionState> {
+ @Override
+ public int compare(final RegionState l, final RegionState r) {
+ int stampCmp = Long.compare(l.getStamp(), r.getStamp());
+ return stampCmp != 0 ? stampCmp :
RegionInfo.COMPARATOR.compare(l.getRegion(), r.getRegion());
+ }
+ }
+
+ public final static RegionStateStampComparator REGION_STATE_STAMP_COMPARATOR
=
+ new RegionStateStampComparator();
+
//
============================================================================================
// TODO: Region State In Transition
//
============================================================================================
public boolean hasRegionsInTransition() {
- return regionStates.hasRegionsInTransition();
+ return regionInTransitionTracker.hasRegionsInTransition();
}
public List<RegionStateNode> getRegionsInTransition() {
- return regionStates.getRegionsInTransition();
+ return regionInTransitionTracker.getRegionsInTransition();
+ }
+
+ public boolean isRegionInTransition(final RegionInfo regionInfo) {
+ return regionInTransitionTracker.isRegionInTransition(regionInfo);
+ }
+
+ public int getRegionTransitScheduledCount() {
+ return regionStates.getRegionTransitScheduledCount();
+ }
+
+ /**
+ * Get the number of regions in transition.
+ */
+ public int getRegionsInTransitionCount() {
+ return regionInTransitionTracker.getRegionsInTransition().size();
+ }
+
+ public SortedSet<RegionState> getRegionsStateInTransition() {
+ final SortedSet<RegionState> rit = new
TreeSet<RegionState>(REGION_STATE_STAMP_COMPARATOR);
+ for (RegionStateNode node : getRegionsInTransition()) {
+ rit.add(node.toRegionState());
+ }
+ return rit;
}
public List<RegionInfo> getAssignedRegions() {
@@ -2021,6 +2085,8 @@ public class AssignmentManager {
if (!succ) {
// revert
regionNode.setState(state);
+ } else {
+ regionInTransitionTracker.handleRegionStateNodeOperation(regionNode);
}
}
}
@@ -2054,6 +2120,8 @@ public class AssignmentManager {
// revert
regionNode.setState(state);
regionNode.setRegionLocation(regionLocation);
+ } else {
+ regionInTransitionTracker.handleRegionStateNodeOperation(regionNode);
}
}
}
@@ -2116,6 +2184,8 @@ public class AssignmentManager {
// revert
regionNode.setState(state);
regionNode.setRegionLocation(regionLocation);
+ } else {
+ regionInTransitionTracker.handleRegionStateNodeOperation(regionNode);
}
}
if (regionLocation != null) {
@@ -2134,12 +2204,27 @@ public class AssignmentManager {
// on table that contains state.
setMetaAssigned(regionInfo, true);
}
+ regionInTransitionTracker.handleRegionStateNodeOperation(regionNode);
}
//
============================================================================================
// The above methods can only be called in TransitRegionStateProcedure(and
related procedures)
//
============================================================================================
+ // As soon as a server a crashed, region hosting on that are un-available,
this method helps to
+ // track those un-available regions. This method can only be called from
ServerCrashProcedure.
+ public void markRegionsAsCrashed(List<RegionInfo> regionsOnCrashedServer,
+ ServerCrashProcedure scp) {
+ ServerName crashedServerName = scp.getServerName();
+ for (RegionInfo regionInfo : regionsOnCrashedServer) {
+ RegionStateNode node =
regionStates.getOrCreateRegionStateNode(regionInfo);
+ if (node.getRegionLocation() == crashedServerName) {
+ node.crashed(scp.getSubmittedTime());
+ regionInTransitionTracker.regionCrashed(node);
+ }
+ }
+ }
+
public void markRegionAsSplit(final RegionInfo parent, final ServerName
serverName,
final RegionInfo daughterA, final RegionInfo daughterB) throws IOException
{
// Update hbase:meta. Parent will be marked offline and split up in
hbase:meta.
@@ -2163,6 +2248,9 @@ public class AssignmentManager {
// it is a split parent. And usually only one of them can match, as after
restart, the region
// state will be changed from SPLIT to CLOSED.
regionStateStore.splitRegion(parent, daughterA, daughterB, serverName);
+ regionInTransitionTracker.handleRegionStateNodeOperation(node);
+ regionInTransitionTracker.handleRegionStateNodeOperation(nodeA);
+ regionInTransitionTracker.handleRegionStateNodeOperation(nodeB);
if (shouldAssignFavoredNodes(parent)) {
List<ServerName> onlineServers =
this.master.getServerManager().getOnlineServersList();
((FavoredNodesPromoter)
getBalancer()).generateFavoredNodesForDaughter(onlineServers, parent,
@@ -2185,9 +2273,10 @@ public class AssignmentManager {
node.setState(State.MERGED);
for (RegionInfo ri : mergeParents) {
regionStates.deleteRegion(ri);
-
+ regionInTransitionTracker.handleRegionDelete(ri);
}
regionStateStore.mergeRegions(child, mergeParents, serverName);
+ regionInTransitionTracker.handleRegionStateNodeOperation(node);
if (shouldAssignFavoredNodes(child)) {
((FavoredNodesPromoter)
getBalancer()).generateFavoredNodesForMergedRegion(child,
mergeParents);
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManagerUtil.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManagerUtil.java
index 4ad69203ad8..7a11c5961b4 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManagerUtil.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManagerUtil.java
@@ -164,7 +164,7 @@ final class AssignmentManagerUtil {
regionNode.lock();
try {
if (ignoreIfInTransition) {
- if (regionNode.isInTransition()) {
+ if (regionNode.isTransitionScheduled()) {
return null;
}
} else {
@@ -172,7 +172,7 @@ final class AssignmentManagerUtil {
// created, or has been successfully closed so should not be on
any servers, so SCP
// will
// not process it either.
- assert !regionNode.isInTransition();
+ assert !regionNode.isTransitionScheduled();
}
regionNode.setProcedure(proc);
} finally {
@@ -194,7 +194,7 @@ final class AssignmentManagerUtil {
// apply ignoreRITs to replica regions as well.
if (
!ignoreIfInTransition ||
!env.getAssignmentManager().getRegionStates()
- .getOrCreateRegionStateNode(ri).isInTransition()
+ .getOrCreateRegionStateNode(ri).isTransitionScheduled()
) {
replicaRegionInfos.add(ri);
}
@@ -242,7 +242,7 @@ final class AssignmentManagerUtil {
for (RegionInfo region : regionsAndReplicas) {
if (
env.getAssignmentManager().getRegionStates().getOrCreateRegionStateNode(region)
- .isInTransition()
+ .isTransitionScheduled()
) {
return null;
}
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionInTransitionTracker.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionInTransitionTracker.java
new file mode 100644
index 00000000000..e3c52e5bdc6
--- /dev/null
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionInTransitionTracker.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.assignment;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.ConcurrentSkipListMap;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.TableState;
+import org.apache.hadoop.hbase.master.RegionState;
+import org.apache.hadoop.hbase.master.TableStateManager;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Tracks regions that are currently in transition (RIT) - those not yet in
their terminal state.
+ */
[email protected]
+public class RegionInTransitionTracker {
+ private static final Logger LOG =
LoggerFactory.getLogger(RegionInTransitionTracker.class);
+
+ private final List<RegionState.State> DISABLE_TABLE_REGION_STATE =
+ Arrays.asList(RegionState.State.OFFLINE, RegionState.State.CLOSED);
+
+ private final List<RegionState.State> ENABLE_TABLE_REGION_STATE =
+ Collections.singletonList(RegionState.State.OPEN);
+
+ private final ConcurrentSkipListMap<RegionInfo, RegionStateNode>
regionInTransition =
+ new ConcurrentSkipListMap<>(RegionInfo.COMPARATOR);
+
+ private TableStateManager tableStateManager;
+
+ public boolean isRegionInTransition(final RegionInfo regionInfo) {
+ return regionInTransition.containsKey(regionInfo);
+ }
+
+ /**
+ * Handles a region whose hosting RegionServer has crashed. When a
RegionServer fails, all regions
+ * it was hosting are automatically added to the RIT list since they need to
be reassigned to
+ * other servers.
+ */
+ public void regionCrashed(RegionStateNode regionStateNode) {
+ if (regionStateNode.getRegionInfo().getReplicaId() !=
RegionInfo.DEFAULT_REPLICA_ID) {
+ return;
+ }
+
+ if (addRegionInTransition(regionStateNode)) {
+ LOG.debug("{} added to RIT list because hosting region server is crashed
",
+ regionStateNode.getRegionInfo().getEncodedName());
+ }
+ }
+
+ /**
+ * Processes a region state change and updates the RIT tracking accordingly.
This is the core
+ * method that determines whether a region should be added to or removed
from the RIT list based
+ * on its current state and the table's enabled/disabled status. This method
should be called
+ * whenever a region state changes get stored to hbase:meta Note: Only
default replicas (replica
+ * ID 0) are tracked. Read replicas are ignored.
+ * @param regionStateNode the region state node with the current state
information
+ */
+ public void handleRegionStateNodeOperation(RegionStateNode regionStateNode) {
+ // only consider default replica for availability
+ if (regionStateNode.getRegionInfo().getReplicaId() !=
RegionInfo.DEFAULT_REPLICA_ID) {
+ return;
+ }
+
+ RegionState.State currentState = regionStateNode.getState();
+ boolean tableEnabled = isTableEnabled(regionStateNode.getTable());
+ List<RegionState.State> terminalStates =
+ tableEnabled ? ENABLE_TABLE_REGION_STATE : DISABLE_TABLE_REGION_STATE;
+
+ // if region is merged or split it should not be in RIT list
+ if (
+ currentState == RegionState.State.SPLIT || currentState ==
RegionState.State.MERGED
+ || regionStateNode.getRegionInfo().isSplit()
+ ) {
+ if (removeRegionInTransition(regionStateNode.getRegionInfo())) {
+ LOG.debug("Removed {} from RIT list as it is split or merged",
+ regionStateNode.getRegionInfo().getEncodedName());
+ }
+ } else if (!terminalStates.contains(currentState)) {
+ if (addRegionInTransition(regionStateNode)) {
+ LOG.debug("{} added to RIT list because it is in-between state, region
state : {} ",
+ regionStateNode.getRegionInfo().getEncodedName(), currentState);
+ }
+ } else {
+ if (removeRegionInTransition(regionStateNode.getRegionInfo())) {
+ LOG.debug("Removed {} from RIT list as reached to terminal state {}",
+ regionStateNode.getRegionInfo().getEncodedName(), currentState);
+ }
+ }
+ }
+
+ private boolean isTableEnabled(TableName tableName) {
+ if (tableStateManager != null) {
+ return tableStateManager.isTableState(tableName,
TableState.State.ENABLED,
+ TableState.State.ENABLING);
+ }
+ // AssignmentManager calls setTableStateManager once hbase:meta is
confirmed online, if it is
+ // still null it means confirmation is still pending. One should not
access TableStateManger
+ // till the time.
+ assert TableName.isMetaTableName(tableName);
+ return true;
+ }
+
+ /**
+ * Handles the deletion of a region by removing it from RIT tracking. This
is called when a region
+ * is permanently removed from the cluster, typically after a successful
merge operation where the
+ * parent regions are cleaned up. During table deletion, table should be
already disabled and all
+ * the region are already OFFLINE
+ * @param regionInfo the region being deleted
+ */
+ public void handleRegionDelete(RegionInfo regionInfo) {
+ removeRegionInTransition(regionInfo);
+ }
+
+ private boolean addRegionInTransition(final RegionStateNode regionStateNode)
{
+ return regionInTransition.putIfAbsent(regionStateNode.getRegionInfo(),
regionStateNode) == null;
+ }
+
+ private boolean removeRegionInTransition(final RegionInfo regionInfo) {
+ return regionInTransition.remove(regionInfo) != null;
+ }
+
+ public void stop() {
+ regionInTransition.clear();
+ }
+
+ public boolean hasRegionsInTransition() {
+ return !regionInTransition.isEmpty();
+ }
+
+ public List<RegionStateNode> getRegionsInTransition() {
+ return new ArrayList<>(regionInTransition.values());
+ }
+
+ public void setTableStateManager(TableStateManager tableStateManager) {
+ this.tableStateManager = tableStateManager;
+ }
+}
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java
index c00f8c367ad..26d16cd9920 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java
@@ -18,7 +18,7 @@
package org.apache.hadoop.hbase.master.assignment;
import java.util.Arrays;
-import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.hadoop.hbase.HConstants;
@@ -68,6 +68,9 @@ import org.slf4j.LoggerFactory;
public class RegionStateNode implements Comparable<RegionStateNode> {
private static final Logger LOG =
LoggerFactory.getLogger(RegionStateNode.class);
+ // It stores count of all active TRSP in the master. Had to pass it from
regionStates to
+ // maintain the count
+ private final AtomicInteger activeTransitProcedureCount;
private static final class AssignmentProcedureEvent extends
ProcedureEvent<RegionInfo> {
public AssignmentProcedureEvent(final RegionInfo regionInfo) {
@@ -78,7 +81,6 @@ public class RegionStateNode implements
Comparable<RegionStateNode> {
final Lock lock = new ReentrantLock();
private final RegionInfo regionInfo;
private final ProcedureEvent<?> event;
- private final ConcurrentMap<RegionInfo, RegionStateNode> ritMap;
// volatile only for getLastUpdate and test usage, the upper layer should
sync on the
// RegionStateNode before accessing usually.
@@ -96,16 +98,16 @@ public class RegionStateNode implements
Comparable<RegionStateNode> {
/**
* Updated whenever a call to {@link #setRegionLocation(ServerName)} or
- * {@link #setState(RegionState.State, RegionState.State...)}.
+ * {@link #setState(RegionState.State, RegionState.State...)} or {@link
#crashed(long)}.
*/
private volatile long lastUpdate = 0;
private volatile long openSeqNum = HConstants.NO_SEQNUM;
- RegionStateNode(RegionInfo regionInfo, ConcurrentMap<RegionInfo,
RegionStateNode> ritMap) {
+ RegionStateNode(RegionInfo regionInfo, AtomicInteger
activeTransitProcedureCount) {
this.regionInfo = regionInfo;
this.event = new AssignmentProcedureEvent(regionInfo);
- this.ritMap = ritMap;
+ this.activeTransitProcedureCount = activeTransitProcedureCount;
}
/**
@@ -160,7 +162,7 @@ public class RegionStateNode implements
Comparable<RegionStateNode> {
return isInState(State.FAILED_OPEN) && getProcedure() != null;
}
- public boolean isInTransition() {
+ public boolean isTransitionScheduled() {
return getProcedure() != null;
}
@@ -189,6 +191,10 @@ public class RegionStateNode implements
Comparable<RegionStateNode> {
this.lastHost = serverName;
}
+ public void crashed(long crashTime) {
+ this.lastUpdate = crashTime;
+ }
+
public void setOpenSeqNum(final long seqId) {
this.openSeqNum = seqId;
}
@@ -206,14 +212,14 @@ public class RegionStateNode implements
Comparable<RegionStateNode> {
public TransitRegionStateProcedure setProcedure(TransitRegionStateProcedure
proc) {
assert this.procedure == null;
this.procedure = proc;
- ritMap.put(regionInfo, this);
+ activeTransitProcedureCount.incrementAndGet();
return proc;
}
public void unsetProcedure(TransitRegionStateProcedure proc) {
assert this.procedure == proc;
+ activeTransitProcedureCount.decrementAndGet();
this.procedure = null;
- ritMap.remove(regionInfo, this);
}
public TransitRegionStateProcedure getProcedure() {
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
index 26cbdb9288f..e8307f2951a 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
@@ -20,13 +20,10 @@ package org.apache.hadoop.hbase.master.assignment;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
-import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import java.util.SortedSet;
-import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.atomic.AtomicInteger;
@@ -54,22 +51,10 @@ import org.slf4j.LoggerFactory;
public class RegionStates {
private static final Logger LOG =
LoggerFactory.getLogger(RegionStates.class);
- // This comparator sorts the RegionStates by time stamp then Region name.
- // Comparing by timestamp alone can lead us to discard different
RegionStates that happen
- // to share a timestamp.
- private static class RegionStateStampComparator implements
Comparator<RegionState> {
- @Override
- public int compare(final RegionState l, final RegionState r) {
- int stampCmp = Long.compare(l.getStamp(), r.getStamp());
- return stampCmp != 0 ? stampCmp :
RegionInfo.COMPARATOR.compare(l.getRegion(), r.getRegion());
- }
- }
-
- public final static RegionStateStampComparator REGION_STATE_STAMP_COMPARATOR
=
- new RegionStateStampComparator();
-
private final Object regionsMapLock = new Object();
+ private final AtomicInteger activeTransitProcedureCount = new
AtomicInteger(0);
+
// TODO: Replace the ConcurrentSkipListMaps
/**
* A Map from {@link RegionInfo#getRegionName()} to {@link RegionStateNode}
@@ -84,9 +69,6 @@ public class RegionStates {
private final ConcurrentSkipListMap<String, RegionStateNode>
encodedRegionsMap =
new ConcurrentSkipListMap<>();
- private final ConcurrentSkipListMap<RegionInfo, RegionStateNode>
regionInTransition =
- new ConcurrentSkipListMap<>(RegionInfo.COMPARATOR);
-
/**
* Regions marked as offline on a read of hbase:meta. Unused or at least,
once offlined, regions
* have no means of coming on line again. TODO.
@@ -109,23 +91,17 @@ public class RegionStates {
public void clear() {
regionsMap.clear();
encodedRegionsMap.clear();
- regionInTransition.clear();
regionOffline.clear();
serverMap.clear();
}
- public boolean isRegionInRegionStates(final RegionInfo hri) {
- return (regionsMap.containsKey(hri.getRegionName()) ||
regionInTransition.containsKey(hri)
- || regionOffline.containsKey(hri));
- }
-
// ==========================================================================
// RegionStateNode helpers
// ==========================================================================
RegionStateNode createRegionStateNode(RegionInfo regionInfo) {
synchronized (regionsMapLock) {
RegionStateNode node =
regionsMap.computeIfAbsent(regionInfo.getRegionName(),
- key -> new RegionStateNode(regionInfo, regionInTransition));
+ key -> new RegionStateNode(regionInfo, activeTransitProcedureCount));
if (encodedRegionsMap.get(regionInfo.getEncodedName()) != node) {
encodedRegionsMap.put(regionInfo.getEncodedName(), node);
@@ -157,12 +133,6 @@ public class RegionStates {
regionsMap.remove(regionInfo.getRegionName());
encodedRegionsMap.remove(regionInfo.getEncodedName());
}
- // See HBASE-20860
- // After master restarts, merged regions' RIT state may not be cleaned,
- // making sure they are cleaned here
- if (regionInTransition.containsKey(regionInfo)) {
- regionInTransition.remove(regionInfo);
- }
// Remove from the offline regions map too if there.
if (this.regionOffline.containsKey(regionInfo)) {
if (LOG.isTraceEnabled()) LOG.trace("Removing from regionOffline Map: "
+ regionInfo);
@@ -459,7 +429,7 @@ public class RegionStates {
public List<RegionInfo> getAssignedRegions() {
final List<RegionInfo> result = new ArrayList<RegionInfo>();
for (RegionStateNode node : regionsMap.values()) {
- if (!node.isInTransition()) {
+ if (!node.isTransitionScheduled()) {
result.add(node.getRegionInfo());
}
}
@@ -618,59 +588,6 @@ public class RegionStates {
TableState.State.DISABLING);
}
- // ==========================================================================
- // Region in transition helpers
- // ==========================================================================
- public boolean hasRegionsInTransition() {
- return !regionInTransition.isEmpty();
- }
-
- public boolean isRegionInTransition(final RegionInfo regionInfo) {
- final RegionStateNode node = regionInTransition.get(regionInfo);
- return node != null ? node.isInTransition() : false;
- }
-
- public RegionState getRegionTransitionState(RegionInfo hri) {
- RegionStateNode node = regionInTransition.get(hri);
- if (node == null) {
- return null;
- }
-
- node.lock();
- try {
- return node.isInTransition() ? node.toRegionState() : null;
- } finally {
- node.unlock();
- }
- }
-
- public List<RegionStateNode> getRegionsInTransition() {
- return new ArrayList<RegionStateNode>(regionInTransition.values());
- }
-
- /**
- * Get the number of regions in transition.
- */
- public int getRegionsInTransitionCount() {
- return regionInTransition.size();
- }
-
- public List<RegionState> getRegionsStateInTransition() {
- final List<RegionState> rit = new
ArrayList<RegionState>(regionInTransition.size());
- for (RegionStateNode node : regionInTransition.values()) {
- rit.add(node.toRegionState());
- }
- return rit;
- }
-
- public SortedSet<RegionState> getRegionsInTransitionOrderedByTimestamp() {
- final SortedSet<RegionState> rit = new
TreeSet<RegionState>(REGION_STATE_STAMP_COMPARATOR);
- for (RegionStateNode node : regionInTransition.values()) {
- rit.add(node.toRegionState());
- }
- return rit;
- }
-
// ==========================================================================
// Region offline helpers
// ==========================================================================
@@ -680,6 +597,10 @@ public class RegionStates {
regionOffline.put(regionNode.getRegionInfo(), regionNode);
}
+ public int getRegionTransitScheduledCount() {
+ return activeTransitProcedureCount.get();
+ }
+
// ==========================================================================
// Region FAIL_OPEN helpers
// ==========================================================================
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java
index 122c901fd73..892391c57d1 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java
@@ -252,7 +252,7 @@ public final class ProcedureSyncWait {
new ProcedureSyncWait.Predicate<Boolean>() {
@Override
public Boolean evaluate() throws IOException {
- return !states.isRegionInTransition(region);
+ return !states.getRegionStateNode(region).isTransitionScheduled();
}
});
}
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
index 4efb1768b0c..1a3ca14db92 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
@@ -96,7 +96,7 @@ public class ReopenTableRegionsProcedure
// If the region node is null, then at least in the next round we can
remove this region to make
// progress. And the second condition is a normal one, if there are no
TRSP with it then we can
// schedule one to make progress.
- return regionNode == null || !regionNode.isInTransition();
+ return regionNode == null || !regionNode.isTransitionScheduled();
}
@Override
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
index 69ebd0567de..0a79879eff3 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
@@ -23,6 +23,7 @@ import static
org.apache.hadoop.hbase.HConstants.HBASE_SPLIT_WAL_COORDINATED_BY_
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.ServerName;
@@ -162,6 +163,8 @@ public class ServerCrashProcedure extends
LOG.info("Start " + this);
// If carrying meta, process it first. Else, get list of regions on
crashed server.
if (this.carryingMeta) {
+ env.getAssignmentManager().markRegionsAsCrashed(
+
Collections.singletonList(RegionInfoBuilder.FIRST_META_REGIONINFO), this);
setNextState(ServerCrashState.SERVER_CRASH_SPLIT_META_LOGS);
} else {
setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
@@ -201,6 +204,7 @@ public class ServerCrashProcedure extends
if (LOG.isTraceEnabled()) {
this.regionsOnCrashedServer.stream().forEach(ri ->
LOG.trace(ri.getShortNameToLog()));
}
+
env.getAssignmentManager().markRegionsAsCrashed(regionsOnCrashedServer, this);
}
if (!this.shouldSplitWal) {
setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
index 48a7615b2a4..4ebfa3f6cfe 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
@@ -4035,9 +4035,8 @@ public class HBaseTestingUtility extends
HBaseZKTestingUtility {
return new ExplainingPredicate<IOException>() {
@Override
public String explainFailure() throws IOException {
- final RegionStates regionStates =
-
getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStates();
- return "found in transition: " +
regionStates.getRegionsInTransition().toString();
+ final AssignmentManager am =
getMiniHBaseCluster().getMaster().getAssignmentManager();
+ return "found in transition: " +
am.getRegionsInTransition().toString();
}
@Override
@@ -4051,6 +4050,34 @@ public class HBaseTestingUtility extends
HBaseZKTestingUtility {
};
}
+ /**
+ * Returns a {@link Predicate} for checking that there are no procedure to
region transition in
+ * master
+ */
+ public ExplainingPredicate<IOException> predicateNoRegionTransitScheduled() {
+ return new ExplainingPredicate<IOException>() {
+ @Override
+ public String explainFailure() throws IOException {
+ final AssignmentManager am =
getMiniHBaseCluster().getMaster().getAssignmentManager();
+ return "Number of procedure scheduled for region transit: "
+ + am.getRegionTransitScheduledCount();
+ }
+
+ @Override
+ public boolean evaluate() throws IOException {
+ HMaster master = getMiniHBaseCluster().getMaster();
+ if (master == null) {
+ return false;
+ }
+ AssignmentManager am = master.getAssignmentManager();
+ if (am == null) {
+ return false;
+ }
+ return am.getRegionTransitScheduledCount() == 0;
+ }
+ };
+ }
+
/**
* Returns a {@link Predicate} for checking that table is enabled
*/
@@ -4135,6 +4162,21 @@ public class HBaseTestingUtility extends
HBaseZKTestingUtility {
waitUntilNoRegionsInTransition(15 * 60000);
}
+ /**
+ * Wait until no regions in transition.
+ * @param timeout How long to wait.
+ */
+ public void waitUntilNoRegionTransitScheduled(final long timeout) throws
IOException {
+ waitFor(timeout, predicateNoRegionTransitScheduled());
+ }
+
+ /**
+ * Wait until no TRSP is present
+ */
+ public void waitUntilNoRegionTransitScheduled() throws IOException {
+ waitUntilNoRegionTransitScheduled(15 * 60000);
+ }
+
/**
* Wait until labels is ready in VisibilityLabelsCache.
*/
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi.java
index 2455590ec25..d3b4f51e1d2 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi.java
@@ -94,12 +94,12 @@ public class TestAsyncRegionAdminApi extends
TestAsyncAdminBase {
// Expected
assertThat(e.getCause(), instanceOf(DoNotRetryRegionException.class));
}
- assertFalse(am.getRegionStates().getRegionStateNode(hri).isInTransition());
+
assertFalse(am.getRegionStates().getRegionStateNode(hri).isTransitionScheduled());
assertTrue(regionStates.getRegionState(hri).isOpened());
// unassign region
admin.unassign(hri.getRegionName(), true).get();
- assertFalse(am.getRegionStates().getRegionStateNode(hri).isInTransition());
+
assertFalse(am.getRegionStates().getRegionStateNode(hri).isTransitionScheduled());
assertTrue(regionStates.getRegionState(hri).isClosed());
}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSplitOrMergeStatus.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSplitOrMergeStatus.java
index fa838f05f39..6600784c401 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSplitOrMergeStatus.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSplitOrMergeStatus.java
@@ -206,8 +206,8 @@ public class TestSplitOrMergeStatus {
ProcedureTestingUtility.waitProcedure(procExec, procId2);
AssignmentTestingUtil.killRs(TEST_UTIL, serverName);
Threads.sleepWithoutInterrupt(5000);
- boolean hasRegionsInTransition =
TEST_UTIL.getMiniHBaseCluster().getMaster()
- .getAssignmentManager().getRegionStates().hasRegionsInTransition();
+ boolean hasRegionsInTransition =
+
TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().hasRegionsInTransition();
assertEquals(false, hasRegionsInTransition);
}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerMetrics.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerMetrics.java
index add0c4ed2a5..d1c8e5ddf6b 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerMetrics.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerMetrics.java
@@ -154,7 +154,7 @@ public class TestAssignmentManagerMetrics {
// Sleep 5 seconds, wait for doMetrics chore catching up
// the rit count consists of rit and failed opens. see
RegionInTransitionStat#update
// Waiting for the completion of rit makes the assert stable.
- TEST_UTIL.waitUntilNoRegionsInTransition();
+ TEST_UTIL.waitUntilNoRegionTransitScheduled();
Thread.sleep(MSG_INTERVAL * 5);
METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_NAME, 1,
amSource);
METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_OVER_THRESHOLD_NAME,
1,
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
index 046c72050ba..a58ea0b159e 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hbase.master;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertTrue;
import java.util.List;
@@ -95,7 +96,7 @@ public class TestDeadServer {
assertTrue(ds.isDeadServer(deadServer));
Set<ServerName> deadServerNames = ds.copyServerNames();
for (ServerName eachDeadServer : deadServerNames) {
- Assert.assertNotNull(ds.getTimeOfDeath(eachDeadServer));
+ assertNotEquals(0, ds.getDeathTimestamp(eachDeadServer));
}
final ServerName deadServerHostComingAlive =
ServerName.valueOf("127.0.0.1", 9090, 223341L);
assertTrue(ds.cleanPreviousInstance(deadServerHostComingAlive));
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalanceThrottling.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalanceThrottling.java
index ea527bc6704..f82385e8b37 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalanceThrottling.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalanceThrottling.java
@@ -126,8 +126,8 @@ public class TestMasterBalanceThrottling {
@Override
public void run() {
while (!stop.get()) {
- maxCount.set(Math.max(maxCount.get(),
-
master.getAssignmentManager().getRegionStates().getRegionsInTransitionCount()));
+ maxCount.set(
+ Math.max(maxCount.get(),
master.getAssignmentManager().getRegionsInTransitionCount()));
try {
Thread.sleep(10);
} catch (InterruptedException e) {
@@ -142,7 +142,7 @@ public class TestMasterBalanceThrottling {
}
private void unbalance(HMaster master, TableName tableName) throws Exception
{
- while
(master.getAssignmentManager().getRegionStates().getRegionsInTransitionCount()
> 0) {
+ while (master.getAssignmentManager().getRegionsInTransitionCount() > 0) {
Thread.sleep(100);
}
HRegionServer biasedServer =
TEST_UTIL.getMiniHBaseCluster().getRegionServer(0);
@@ -150,7 +150,7 @@ public class TestMasterBalanceThrottling {
master.move(regionInfo.getEncodedNameAsBytes(),
Bytes.toBytes(biasedServer.getServerName().getServerName()));
}
- while
(master.getAssignmentManager().getRegionStates().getRegionsInTransitionCount()
> 0) {
+ while (master.getAssignmentManager().getRegionsInTransitionCount() > 0) {
Thread.sleep(100);
}
}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterDryRunBalancer.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterDryRunBalancer.java
index 411cc0eb6d1..c5a2880b96d 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterDryRunBalancer.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterDryRunBalancer.java
@@ -119,6 +119,6 @@ public class TestMasterDryRunBalancer {
private void waitForRegionsToSettle(HMaster master) {
Waiter.waitFor(TEST_UTIL.getConfiguration(), 60_000,
- () ->
master.getAssignmentManager().getRegionStates().getRegionsInTransitionCount()
<= 0);
+ () -> master.getAssignmentManager().getRegionsInTransitionCount() <= 0);
}
}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/AssignmentTestingUtil.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/AssignmentTestingUtil.java
index 662d39a0c41..a36c5bb2da8 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/AssignmentTestingUtil.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/AssignmentTestingUtil.java
@@ -50,7 +50,7 @@ public final class AssignmentTestingUtil {
public static void waitForRegionToBeInTransition(final HBaseTestingUtility
util,
final RegionInfo hri) throws Exception {
- while
(!getMaster(util).getAssignmentManager().getRegionStates().isRegionInTransition(hri))
{
+ while (!getMaster(util).getAssignmentManager().isRegionInTransition(hri)) {
Threads.sleep(10);
}
}
@@ -141,7 +141,7 @@ public final class AssignmentTestingUtil {
RegionStateNode regionNode =
am.getRegionStates().getRegionStateNode(regionInfo);
// Wait until the region has already been open, or we have a TRSP along
with it.
Waiter.waitFor(am.getConfiguration(), 30000,
- () -> regionNode.isInState(State.OPEN) || regionNode.isInTransition());
+ () -> regionNode.isInState(State.OPEN) ||
regionNode.isTransitionScheduled());
TransitRegionStateProcedure proc = regionNode.getProcedure();
regionNode.lock();
try {
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
index 538476be306..7ec02b66790 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
@@ -124,6 +124,7 @@ public class MockMasterServices extends
MockNoopMasterServices {
this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
this.serverManager = new ServerManager(this, new DummyRegionServerList());
this.tableStateManager = mock(TableStateManager.class);
+ assignmentManager.initializationPostMetaOnline();
when(this.tableStateManager.getTableState(any())).thenReturn(new
TableState(
TableName.valueOf("AnyTableNameSetInMockMasterServcies"),
TableState.State.ENABLED));
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManagerBase.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManagerBase.java
index 9bcbabf1b02..667a1ea9bd5 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManagerBase.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManagerBase.java
@@ -280,7 +280,7 @@ public abstract class TestAssignmentManagerBase {
TransitRegionStateProcedure proc;
regionNode.lock();
try {
- assertFalse(regionNode.isInTransition());
+ assertFalse(regionNode.isTransitionScheduled());
proc = TransitRegionStateProcedure
.unassign(master.getMasterProcedureExecutor().getEnvironment(), hri);
regionNode.setProcedure(proc);
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManagerUtil.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManagerUtil.java
index 2d87646a62f..6a599f12fcc 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManagerUtil.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManagerUtil.java
@@ -129,6 +129,6 @@ public class TestAssignmentManagerUtil {
IntStream.range(0, REGION_REPLICATION)
.mapToObj(i -> RegionReplicaUtil.getRegionInfoForReplica(regionA, i))
.map(AM.getRegionStates()::getRegionStateNode).forEachOrdered(
- rn -> assertFalse("Should have unset the proc for " + rn,
rn.isInTransition()));
+ rn -> assertFalse("Should have unset the proc for " + rn,
rn.isTransitionScheduled()));
}
}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestTransitRegionStateProcedure.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestTransitRegionStateProcedure.java
index dc8f82295a4..6120de50e38 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestTransitRegionStateProcedure.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestTransitRegionStateProcedure.java
@@ -99,15 +99,15 @@ public class TestTransitRegionStateProcedure {
HMaster master = UTIL.getHBaseCluster().getMaster();
AssignmentManager am = master.getAssignmentManager();
RegionStateNode regionNode =
am.getRegionStates().getRegionStateNode(proc.getRegion());
- assertFalse(regionNode.isInTransition());
+ assertFalse(regionNode.isTransitionScheduled());
regionNode.setProcedure(proc);
- assertTrue(regionNode.isInTransition());
+ assertTrue(regionNode.isTransitionScheduled());
ProcedureExecutor<MasterProcedureEnv> procExec =
master.getMasterProcedureExecutor();
ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
long procId = procExec.submitProcedure(proc);
MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec,
procId);
regionNode = am.getRegionStates().getRegionStateNode(proc.getRegion());
- assertFalse(regionNode.isInTransition());
+ assertFalse(regionNode.isTransitionScheduled());
}
@Test
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredStochasticLoadBalancer.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredStochasticLoadBalancer.java
index e5be89e8ecb..967217af008 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredStochasticLoadBalancer.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredStochasticLoadBalancer.java
@@ -50,6 +50,7 @@ import org.apache.hadoop.hbase.favored.FavoredNodesPlan;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.LoadBalancer;
import org.apache.hadoop.hbase.master.ServerManager;
+import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
import org.apache.hadoop.hbase.master.assignment.RegionStates;
import org.apache.hadoop.hbase.regionserver.HRegion;
@@ -261,7 +262,9 @@ public class TestFavoredStochasticLoadBalancer extends
BalancerTestBase {
// Balancer should unassign the region
assertTrue("Balancer did not run", admin.balancer());
- TEST_UTIL.waitUntilNoRegionsInTransition();
+ TEST_UTIL.waitUntilNoRegionTransitScheduled();
+ assertEquals("One region should be unassigned", 1,
+ master.getAssignmentManager().getRegionsInTransitionCount());
admin.assign(region.getEncodedNameAsBytes());
TEST_UTIL.waitUntilNoRegionsInTransition(60000);
@@ -382,7 +385,8 @@ public class TestFavoredStochasticLoadBalancer extends
BalancerTestBase {
// Lets kill all the RS that are favored nodes for this region.
stopServersAndWaitUntilProcessed(currentFN);
- final RegionStates regionStates =
master.getAssignmentManager().getRegionStates();
+ final AssignmentManager am = master.getAssignmentManager();
+ final RegionStates regionStates = am.getRegionStates();
TEST_UTIL.waitFor(10000, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
@@ -403,7 +407,7 @@ public class TestFavoredStochasticLoadBalancer extends
BalancerTestBase {
FavoredNodeAssignmentHelper helper = new
FavoredNodeAssignmentHelper(serversForNewFN, conf);
helper.initialize();
- for (RegionStateNode regionState : regionStates.getRegionsInTransition()) {
+ for (RegionStateNode regionState : am.getRegionsInTransition()) {
RegionInfo regionInfo = regionState.getRegionInfo();
List<ServerName> newFavoredNodes =
helper.generateFavoredNodes(regionInfo);
assertNotNull(newFavoredNodes);
@@ -445,7 +449,8 @@ public class TestFavoredStochasticLoadBalancer extends
BalancerTestBase {
// Lets kill all the RS that are favored nodes for this region.
stopServersAndWaitUntilProcessed(currentFN);
- final RegionStates regionStatesBeforeMaster =
master.getAssignmentManager().getRegionStates();
+ final AssignmentManager am = master.getAssignmentManager();
+ final RegionStates regionStatesBeforeMaster = am.getRegionStates();
TEST_UTIL.waitFor(10000, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
@@ -457,7 +462,7 @@ public class TestFavoredStochasticLoadBalancer extends
BalancerTestBase {
regionStatesBeforeMaster.getRegionState(region).isFailedOpen());
List<RegionInfo> rit = Lists.newArrayList();
- for (RegionStateNode regionState :
regionStatesBeforeMaster.getRegionsInTransition()) {
+ for (RegionStateNode regionState : am.getRegionsInTransition()) {
RegionInfo regionInfo = regionState.getRegionInfo();
LOG.debug("Region in transition after stopping FN's: " + regionInfo);
rit.add(regionInfo);
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/http/TestMasterStatusServlet.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/http/TestMasterStatusServlet.java
index a2c079661ea..a582f2d0697 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/http/TestMasterStatusServlet.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/http/TestMasterStatusServlet.java
@@ -97,7 +97,7 @@ public class TestMasterStatusServlet {
regionsInTransition
.add(new RegionState(FAKE_HRI, RegionState.State.CLOSING, 12345L,
FAKE_HOST));
Mockito.doReturn(rs).when(am).getRegionStates();
- Mockito.doReturn(regionsInTransition).when(rs).getRegionsInTransition();
+ Mockito.doReturn(regionsInTransition).when(am).getRegionsInTransition();
Mockito.doReturn(am).when(master).getAssignmentManager();
Mockito.doReturn(serverManager).when(master).getServerManager();
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/janitor/TestCatalogJanitorInMemoryStates.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/janitor/TestCatalogJanitorInMemoryStates.java
index 86e4c4a4bf9..8c5e4e944ab 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/janitor/TestCatalogJanitorInMemoryStates.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/janitor/TestCatalogJanitorInMemoryStates.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.master.janitor;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
@@ -116,8 +117,8 @@ public class TestCatalogJanitorInMemoryStates {
LOG.info("Daughter regions: " + daughters);
assertNotNull("Should have found daughter regions for " + parent,
daughters);
- assertTrue("Parent region should exist in RegionStates",
- am.getRegionStates().isRegionInRegionStates(parent.getRegion()));
+ assertNotNull("Parent region should exist in RegionStates",
+
am.getRegionStates().getRegionStateNodeFromName(parent.getRegion().getRegionName()));
assertTrue("Parent region should exist in ServerManager",
sm.isRegionInServerManagerStates(parent.getRegion()));
@@ -140,8 +141,8 @@ public class TestCatalogJanitorInMemoryStates {
}
});
- assertFalse("Parent region should have been removed from RegionStates",
- am.getRegionStates().isRegionInRegionStates(parent.getRegion()));
+ assertNull("Parent region should have been removed from RegionStates",
+
am.getRegionStates().getRegionStateNodeFromName(parent.getRegion().getRegionName()));
assertFalse("Parent region should have been removed from ServerManager",
sm.isRegionInServerManagerStates(parent.getRegion()));
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java
index 4c196f63d91..55c2aa5c370 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java
@@ -107,6 +107,7 @@ public class MasterProcedureTestingUtility {
am.setupRIT(procExec.getActiveProceduresNoCopy().stream().filter(p
-> !p.isSuccess())
.filter(p -> p instanceof TransitRegionStateProcedure)
.map(p -> (TransitRegionStateProcedure)
p).collect(Collectors.toList()));
+ am.initializationPostMetaOnline();
return null;
}
},
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionFileSystem.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionFileSystem.java
index 49c2f83e8b3..fe4ecadb66d 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionFileSystem.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionFileSystem.java
@@ -114,8 +114,7 @@ public class TestHRegionFileSystem {
hcdA.setValue(HStore.BLOCK_STORAGE_POLICY_KEY, "ONE_SSD");
admin.modifyColumnFamily(TABLE_NAME, hcdA);
while (
-
TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStates()
- .hasRegionsInTransition()
+
TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().hasRegionsInTransition()
) {
Thread.sleep(200);
LOG.debug("Waiting on table to finish schema altering");
@@ -125,8 +124,7 @@ public class TestHRegionFileSystem {
hcdB.setStoragePolicy("ALL_SSD");
admin.modifyColumnFamily(TABLE_NAME, hcdB);
while (
-
TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStates()
- .hasRegionsInTransition()
+
TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().hasRegionsInTransition()
) {
Thread.sleep(200);
LOG.debug("Waiting on table to finish schema altering");
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java
index 2f4bfbe7dbe..6f386cbd5ad 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java
@@ -58,7 +58,6 @@ import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.MasterRpcServices;
import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
-import org.apache.hadoop.hbase.master.assignment.RegionStates;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
@@ -163,15 +162,17 @@ public class TestRegionMergeTransactionOnCluster {
: mergedRegions.getSecond();
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
AssignmentManager am = cluster.getMaster().getAssignmentManager();
- RegionStates regionStates = am.getRegionStates();
- // We should not be able to assign it again
+ // We should not be able to assign it again, but we are able to do it
here. Assertions are
+ // poor here and missing that assign is possible here. Created
HBASE-29692 for resolving this.
am.assign(hri);
- assertFalse("Merged region can't be assigned",
regionStates.isRegionInTransition(hri));
+ assertFalse("Merged region can't be assigned",
+ am.getRegionStates().getRegionStateNode(hri).isTransitionScheduled());
// We should not be able to unassign it either
am.unassign(hri);
- assertFalse("Merged region can't be unassigned",
regionStates.isRegionInTransition(hri));
+ assertFalse("Merged region can't be unassigned",
+ am.getRegionStates().getRegionStateNode(hri).isTransitionScheduled());
table.close();
} finally {
@@ -562,11 +563,11 @@ public class TestRegionMergeTransactionOnCluster {
enabled.get() && req.getTransition(0).getTransitionCode() ==
TransitionCode.READY_TO_MERGE
&& !resp.hasErrorMessage()
) {
- RegionStates regionStates =
myMaster.getAssignmentManager().getRegionStates();
- for (RegionState regionState :
regionStates.getRegionsStateInTransition()) {
+ AssignmentManager am = myMaster.getAssignmentManager();
+ for (RegionState regionState : am.getRegionsStateInTransition()) {
// Find the merging_new region and remove it
if (regionState.isMergingNew()) {
- regionStates.deleteRegion(regionState.getRegion());
+ am.getRegionStates().deleteRegion(regionState.getRegion());
}
}
}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
index 91bbea57530..32e135d20ae 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
@@ -909,7 +909,7 @@ public class TestSplitTransactionOnCluster {
} catch (DoNotRetryIOException e) {
// Expected
}
- assertFalse("Split region can't be assigned",
regionStates.isRegionInTransition(hri));
+ assertFalse("Split region can't be assigned",
am.isRegionInTransition(hri));
assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
// We should not be able to unassign it either
@@ -919,7 +919,7 @@ public class TestSplitTransactionOnCluster {
} catch (DoNotRetryIOException e) {
// Expected
}
- assertFalse("Split region can't be unassigned",
regionStates.isRegionInTransition(hri));
+ assertFalse("Split region can't be unassigned",
am.isRegionInTransition(hri));
assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
} finally {
admin.balancerSwitch(true, false);
@@ -1145,12 +1145,11 @@ public class TestSplitTransactionOnCluster {
&&
req.getTransition(0).getTransitionCode().equals(TransitionCode.READY_TO_SPLIT)
&& !resp.hasErrorMessage()
) {
- RegionStates regionStates =
myMaster.getAssignmentManager().getRegionStates();
- for (RegionStateNode regionState :
regionStates.getRegionsInTransition()) {
- /*
- * TODO!!!! // Find the merging_new region and remove it if
(regionState.isSplittingNew())
- * { regionStates.deleteRegion(regionState.getRegion()); }
- */
+ AssignmentManager am = myMaster.getAssignmentManager();
+ for (RegionStateNode regionState : am.getRegionsInTransition()) {
+ if (regionState.toRegionState().isSplittingNew()) {
+
am.getRegionStates().deleteRegion(regionState.toRegionState().getRegion());
+ }
}
}
return resp;