This is an automated email from the ASF dual-hosted git repository.

stack pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 13d5a67  HBASE-20368 Fix RIT stuck when a rsgroup has no online 
servers but AM's pendingAssginQueue is cleared
13d5a67 is described below

commit 13d5a670942e0c78586ff5dbd372854465412d84
Author: haxiaolin <[email protected]>
AuthorDate: Wed Apr 11 18:08:08 2018 +0800

    HBASE-20368 Fix RIT stuck when a rsgroup has no online servers but AM's 
pendingAssginQueue is cleared
---
 .../hbase/rsgroup/RSGroupBasedLoadBalancer.java    |  10 ++
 .../hadoop/hbase/rsgroup/RSGroupTestingUtil.java   |  59 +++++++++
 .../rsgroup/TestAssignmentOnRSGroupCrash.java      | 144 +++++++++++++++++++++
 .../hadoop/hbase/rsgroup/TestRSGroupsBase.java     |  18 +--
 .../hbase/master/assignment/AssignmentManager.java |   5 +
 5 files changed, 219 insertions(+), 17 deletions(-)

diff --git 
a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java
 
b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java
index ffc3b62..8b40d25 100644
--- 
a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java
+++ 
b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java
@@ -216,6 +216,16 @@ public class RSGroupBasedLoadBalancer implements 
RSGroupableBalancer {
         if(candidateList.size() > 0) {
           assignments.putAll(this.internalBalancer.retainAssignment(
               currentAssignmentMap, candidateList));
+        } else{
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("No available server to assign regions: " + 
regionList.toString());
+          }
+          for(RegionInfo region : regionList) {
+            if (!assignments.containsKey(LoadBalancer.BOGUS_SERVER_NAME)) {
+              assignments.put(LoadBalancer.BOGUS_SERVER_NAME, new 
ArrayList<>());
+            }
+            assignments.get(LoadBalancer.BOGUS_SERVER_NAME).add(region);
+          }
         }
       }
 
diff --git 
a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/RSGroupTestingUtil.java
 
b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/RSGroupTestingUtil.java
new file mode 100644
index 0000000..b722401
--- /dev/null
+++ 
b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/RSGroupTestingUtil.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.rsgroup;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+import org.apache.hadoop.hbase.net.Address;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.yetus.audience.InterfaceStability;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
[email protected]
[email protected]
+public abstract class RSGroupTestingUtil {
+  private static final Logger LOG = 
LoggerFactory.getLogger(RSGroupTestingUtil.class);
+
+  private RSGroupTestingUtil() {
+  }
+
+  public static RSGroupInfo addRSGroup(final RSGroupAdmin rsGroupAdmin, String 
groupName,
+      int groupRSCount) throws IOException {
+    RSGroupInfo defaultInfo = 
rsGroupAdmin.getRSGroupInfo(RSGroupInfo.DEFAULT_GROUP);
+    assertTrue(defaultInfo != null);
+    assertTrue(defaultInfo.getServers().size() >= groupRSCount);
+    rsGroupAdmin.addRSGroup(groupName);
+
+    Set<Address> set = new HashSet<>();
+    for(Address server: defaultInfo.getServers()) {
+      if(set.size() == groupRSCount) {
+        break;
+      }
+      set.add(server);
+    }
+    rsGroupAdmin.moveServers(set, groupName);
+    RSGroupInfo result = rsGroupAdmin.getRSGroupInfo(groupName);
+    assertTrue(result.getServers().size() >= groupRSCount);
+    return result;
+  }
+
+}
diff --git 
a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestAssignmentOnRSGroupCrash.java
 
b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestAssignmentOnRSGroupCrash.java
new file mode 100644
index 0000000..bdae002
--- /dev/null
+++ 
b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestAssignmentOnRSGroupCrash.java
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.rsgroup;
+
+import static org.apache.hadoop.hbase.util.Threads.sleep;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
+import org.apache.hadoop.hbase.net.Address;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Category({ MasterTests.class, LargeTests.class })
+public class TestAssignmentOnRSGroupCrash {
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+      HBaseClassTestRule.forClass(TestAssignmentOnRSGroupCrash.class);
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(TestAssignmentOnRSGroupCrash.class);
+
+  private static final TableName TEST_TABLE = TableName.valueOf("testb");
+  private static final String FAMILY_STR = "f";
+  private static final byte[] FAMILY = Bytes.toBytes(FAMILY_STR);
+  private static final int NUM_RS = 3;
+
+  private HBaseTestingUtility UTIL;
+
+  private static RSGroupAdmin rsGroupAdmin;
+
+  private static void setupConf(Configuration conf) {
+    conf.set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, 
RSGroupAdminEndpoint.class.getName());
+    conf.set(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, 
RSGroupBasedLoadBalancer.class.getName());
+  }
+
+  @Before
+  public void setup() throws Exception {
+    UTIL = new HBaseTestingUtility();
+
+    setupConf(UTIL.getConfiguration());
+    UTIL.startMiniCluster(NUM_RS);
+
+    UTIL.createTable(TEST_TABLE, new byte[][] { FAMILY },
+        new byte[][] { Bytes.toBytes("B"), Bytes.toBytes("D"), 
Bytes.toBytes("F"),
+            Bytes.toBytes("L") });
+    rsGroupAdmin = new VerifyingRSGroupAdminClient(new 
RSGroupAdminClient(UTIL.getConnection()),
+        UTIL.getConfiguration());
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  @Test
+  public void testKillAllRSInGroupAndThenStart() throws Exception {
+    // create a rsgroup and move one regionserver to it
+    String groupName = "my_group";
+    int groupRSCount = 1;
+    RSGroupTestingUtil.addRSGroup(rsGroupAdmin, groupName, groupRSCount);
+    Set<TableName> toAddTables = new HashSet<>();
+    toAddTables.add(TEST_TABLE);
+    rsGroupAdmin.moveTables(toAddTables, groupName);
+    RSGroupInfo rsGroupInfo = rsGroupAdmin.getRSGroupInfo(groupName);
+    LOG.debug("my_group: " + rsGroupInfo.toString());
+    Set<Address> servers = rsGroupInfo.getServers();
+    ServerName myGroupRS = null;
+    for (int i = 0; i < NUM_RS; ++i) {
+      ServerName sn = 
UTIL.getMiniHBaseCluster().getRegionServer(i).getServerName();
+      if (servers.contains(sn.getAddress())) {
+        myGroupRS = sn;
+        break;
+      }
+    }
+    assertNotNull(myGroupRS);
+    checkRegionsOnline(TEST_TABLE, true);
+
+    // stop regionserver in the rsgroup, and table regions will be offline
+    UTIL.getMiniHBaseCluster().stopRegionServer(myGroupRS);
+    // better wait for a while for region reassign
+    sleep(10000);
+    
assertEquals(UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size(),
+        NUM_RS - servers.size());
+    checkRegionsOnline(TEST_TABLE, false);
+
+    // move another regionserver to the rsgroup
+    // in this case, moving another region server can be replaced by 
restarting the regionserver
+    // mentioned before
+    RSGroupInfo defaultInfo = 
rsGroupAdmin.getRSGroupInfo(RSGroupInfo.DEFAULT_GROUP);
+    Set<Address> set = new HashSet<>();
+    for (Address server : defaultInfo.getServers()) {
+      if (set.size() == groupRSCount) {
+        break;
+      }
+      set.add(server);
+    }
+    rsGroupAdmin.moveServers(set, groupName);
+
+    // wait and check if table regions are online
+    sleep(10000);
+    checkRegionsOnline(TEST_TABLE, true);
+  }
+
+  private void checkRegionsOnline(TableName tableName, boolean isOnline) 
throws IOException {
+    for (RegionInfo hri : UTIL.getHBaseAdmin().getTableRegions(tableName)) {
+      
assertTrue(UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStates()
+          .isRegionOnline(hri) == isOnline);
+    }
+  }
+}
diff --git 
a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java
 
b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java
index f28036c..4a1b2f2 100644
--- 
a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java
+++ 
b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java
@@ -27,7 +27,6 @@ import java.io.IOException;
 import java.security.SecureRandom;
 import java.util.ArrayList;
 import java.util.EnumSet;
-import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
@@ -100,22 +99,7 @@ public abstract class TestRSGroupsBase {
 
   protected RSGroupInfo addGroup(String groupName, int serverCount)
       throws IOException, InterruptedException {
-    RSGroupInfo defaultInfo = 
rsGroupAdmin.getRSGroupInfo(RSGroupInfo.DEFAULT_GROUP);
-    assertTrue(defaultInfo != null);
-    assertTrue(defaultInfo.getServers().size() >= serverCount);
-    rsGroupAdmin.addRSGroup(groupName);
-
-    Set<Address> set = new HashSet<>();
-    for(Address server: defaultInfo.getServers()) {
-      if(set.size() == serverCount) {
-        break;
-      }
-      set.add(server);
-    }
-    rsGroupAdmin.moveServers(set, groupName);
-    RSGroupInfo result = rsGroupAdmin.getRSGroupInfo(groupName);
-    assertTrue(result.getServers().size() >= serverCount);
-    return result;
+    return RSGroupTestingUtil.addRSGroup(rsGroupAdmin, groupName, serverCount);
   }
 
   void removeGroup(String groupName) throws IOException {
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index 6e3d58b..69b552e 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -1764,6 +1764,11 @@ public class AssignmentManager implements ServerListener 
{
 
     if (plan.isEmpty()) return;
 
+    List<RegionInfo> bogusRegions = 
plan.remove(LoadBalancer.BOGUS_SERVER_NAME);
+    if (bogusRegions != null && !bogusRegions.isEmpty()) {
+      addToPendingAssignment(regions, bogusRegions);
+    }
+
     int evcount = 0;
     for (Map.Entry<ServerName, List<RegionInfo>> entry: plan.entrySet()) {
       final ServerName server = entry.getKey();

Reply via email to