sunhelly commented on a change in pull request #323: HBASE-22414 Interruption
of moving regions in RSGroup will cause regi…
URL: https://github.com/apache/hbase/pull/323#discussion_r303273393
##########
File path:
hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsAdmin2.java
##########
@@ -459,4 +466,206 @@ public boolean evaluate() throws Exception {
Assert.assertEquals(null, rsGroupAdmin.getRSGroupInfo(fooGroup.getName()));
}
+ @Test
+ public void testFailedMoveBeforeRetryExhaustedWhenMoveServer() throws
Exception {
+ String groupName = getGroupName(name.getMethodName());
+ rsGroupAdmin.addRSGroup(groupName);
+ final RSGroupInfo newGroup = rsGroupAdmin.getRSGroupInfo(groupName);
+ Pair<ServerName, RegionStateNode> gotPair =
createTableAndSetARegionState(newGroup,
+ 10);
+
+ // start thread to recover region state
+ final ServerName movedServer = gotPair.getFirst();
+ final RegionStateNode rsn = gotPair.getSecond();
+ AtomicBoolean changed = new AtomicBoolean(false);
+ Thread t1 = recoverRegionStateThread(movedServer,
+ server -> master.getAssignmentManager().getRegionsOnServer(movedServer),
rsn, changed);
+ t1.start();
+
+ // move target server to group
+ Thread t2 = new Thread(() -> {
+ LOG.info("thread2 start running, to move regions");
+ try {
+ rsGroupAdmin.moveServers(Sets.newHashSet(movedServer.getAddress()),
newGroup.getName());
+ } catch (IOException e) {
+ LOG.error("move server error", e);
+ }
+ });
+ t2.start();
+
+ t1.join();
+ t2.join();
+
+ TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
+ @Override
+ public boolean evaluate() {
+ if (changed.get()) {
+ return
master.getAssignmentManager().getRegionsOnServer(movedServer).size() == 0 &&
!rsn
+ .getRegionLocation().equals(movedServer);
+ }
+ return false;
+ }
+ });
+ }
+
+ @Test
+ public void testFailedMoveBeforeRetryExhaustedWhenMoveTable() throws
Exception {
+ final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()),
1);
+ Pair<ServerName, RegionStateNode> gotPair =
createTableAndSetARegionState(newGroup, 5);
+
+ // move table to group
+ Thread t2 = new Thread(() -> {
+ LOG.info("thread2 start running, to move regions");
+ try {
+ rsGroupAdmin.moveTables(Sets.newHashSet(tableName),
newGroup.getName());
+ } catch (IOException e) {
+ LOG.error("move server error", e);
+ }
+ });
+ t2.start();
+
+ // start thread to recover region state
+ final ServerName ss = gotPair.getFirst();
+ final RegionStateNode rsn = gotPair.getSecond();
+ AtomicBoolean changed = new AtomicBoolean(false);
+
+ Thread t1 = recoverRegionStateThread(ss, server -> {
+ List<RegionInfo> regions =
master.getAssignmentManager().getRegionsOnServer(ss);
+ List<RegionInfo> tableRegions = new ArrayList<>();
+ for (RegionInfo regionInfo : regions) {
+ if (regionInfo.getTable().equals(tableName)) {
+ tableRegions.add(regionInfo);
+ }
+ }
+ return tableRegions;
+ }, rsn, changed);
+ t1.start();
+
+ t1.join();
+ t2.join();
+
+ TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
+ @Override
+ public boolean evaluate() {
+ if (changed.get()) {
+ boolean serverHasTableRegions = false;
+ for (RegionInfo regionInfo :
master.getAssignmentManager().getRegionsOnServer(ss)) {
+ if (regionInfo.getTable().equals(tableName)) {
+ serverHasTableRegions = true;
+ break;
+ }
+ }
+ return !serverHasTableRegions && !rsn.getRegionLocation().equals(ss);
+ }
+ return false;
+ }
+ });
+ }
+
+ private <T> Thread recoverRegionStateThread(T owner, Function<T,
List<RegionInfo>> getRegions,
+ RegionStateNode rsn, AtomicBoolean changed){
+ return new Thread(() -> {
+ LOG.info("thread1 start running, will recover region state");
+ long current = System.currentTimeMillis();
+ // wait until there is only left the region we changed state and recover
its state.
+ // wait time is set according to the number of max retries, all except
failed regions will be
+ // moved in one retry, and will sleep 1s until next retry.
+ while (System.currentTimeMillis() - current <= DEFAULT_MAX_RETRY_VALUE *
1000) {
+ List<RegionInfo> regions = getRegions.apply(owner);
+ LOG.debug("server table region size is:{}", regions.size());
+ assert regions.size() >= 1;
+ // when there is exactly one region left, we can determine the move
operation encountered
+ // exception caused by the strange region state.
+ if (regions.size() == 1) {
+ assertEquals(regions.get(0).getRegionNameAsString(),
+ rsn.getRegionInfo().getRegionNameAsString());
+ rsn.setState(RegionState.State.OPEN);
+ LOG.info("set region {} state OPEN",
rsn.getRegionInfo().getRegionNameAsString());
+ changed.set(true);
+ break;
+ }
+ sleep(5000);
+ }
+ });
+ }
+
+ @Test
+ public void testFailedMoveWhenMoveServer() throws Exception{
+ String groupName = getGroupName(name.getMethodName());
+ rsGroupAdmin.addRSGroup(groupName);
+ final RSGroupInfo newGroup = rsGroupAdmin.getRSGroupInfo(groupName);
+ Pair<ServerName, RegionStateNode> gotPair =
createTableAndSetARegionState(newGroup,
+ 10);
+ try{
+
rsGroupAdmin.moveServers(Sets.newHashSet(gotPair.getFirst().getAddress()),
+ newGroup.getName());
+ fail("move servers to group should fail");
+ }catch (IOException e){
+ assertTrue(e.getMessage().contains(
+ gotPair.getSecond().getRegionInfo().getRegionNameAsString()));
+ }
+ }
+
+ @Test
+ public void testFailedMoveWhenMoveTable() throws Exception{
+ final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()),
1);
+ Pair<ServerName, RegionStateNode> gotPair =
createTableAndSetARegionState(newGroup, 5);
+ try{
+ rsGroupAdmin.moveTables(Sets.newHashSet(tableName), newGroup.getName());
+ fail("move tables to group should fail");
+ }catch (IOException e){
+ assertTrue(e.getMessage().contains(
+ gotPair.getSecond().getRegionInfo().getRegionNameAsString()));
+ }
+ }
+
+ private Pair<ServerName, RegionStateNode>
createTableAndSetARegionState(RSGroupInfo rsGroupInfo,
+ int tableRegionCount) throws Exception{
+ final byte[] familyNameBytes = Bytes.toBytes("f");
+ // All the regions created below will be assigned to the default group.
+ TEST_UTIL.createMultiRegionTable(tableName, familyNameBytes,
tableRegionCount);
+ TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
+ @Override
+ public boolean evaluate() throws Exception {
+ List<String> regions = getTableRegionMap().get(tableName);
+ if (regions == null) {
+ return false;
+ }
+ return getTableRegionMap().get(tableName).size() >= tableRegionCount;
+ }
+ });
+
+ return setARegionState(rsGroupInfo);
+ }
+
+ /**
+ * Randomly choose a region to set state.
+ * @param newGroup target group
+ * @return source server of region, and region state
+ * @throws IOException if methods called throw
+ */
+ private Pair<ServerName, RegionStateNode> setARegionState(RSGroupInfo
newGroup)
Review comment:
Thanks, I'll change the method name.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services