HBASE-20752 Make sure the regions are truly reopened after 
ReopenTableRegionsProcedure


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/7b716c96
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/7b716c96
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/7b716c96

Branch: refs/heads/HBASE-19064
Commit: 7b716c964bc7c692c1222812974ae75428d50e65
Parents: bc9f9ae
Author: zhangduo <zhang...@apache.org>
Authored: Thu Jun 21 10:14:57 2018 +0800
Committer: zhangduo <zhang...@apache.org>
Committed: Fri Jun 22 14:04:33 2018 +0800

----------------------------------------------------------------------
 .../hbase/shaded/protobuf/ProtobufUtil.java     |  17 +++
 .../src/main/protobuf/HBase.proto               |   6 +
 .../src/main/protobuf/MasterProcedure.proto     |   5 +-
 .../master/assignment/AssignmentManager.java    |  23 +---
 .../master/assignment/MoveRegionProcedure.java  |  15 +--
 .../master/assignment/RegionStateStore.java     |   8 +-
 .../hbase/master/assignment/RegionStates.java   | 123 +++++++++++++++----
 .../master/procedure/ModifyTableProcedure.java  | 122 ++++++------------
 .../procedure/ReopenTableRegionsProcedure.java  |  78 +++++++++---
 9 files changed, 244 insertions(+), 153 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/7b716c96/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java
----------------------------------------------------------------------
diff --git 
a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java
 
b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java
index 717ddab..24d2ab7 100644
--- 
a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java
+++ 
b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java
@@ -57,6 +57,7 @@ import org.apache.hadoop.hbase.ExtendedCellBuilderFactory;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HBaseIOException;
 import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionLocation;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.NamespaceDescriptor;
 import org.apache.hadoop.hbase.ServerName;
@@ -3130,6 +3131,22 @@ public final class ProtobufUtil {
     return rib.build();
   }
 
+  public static HBaseProtos.RegionLocation toRegionLocation(HRegionLocation 
loc) {
+    HBaseProtos.RegionLocation.Builder builder = 
HBaseProtos.RegionLocation.newBuilder();
+    builder.setRegionInfo(toRegionInfo(loc.getRegion()));
+    if (loc.getServerName() != null) {
+      builder.setServerName(toServerName(loc.getServerName()));
+    }
+    builder.setSeqNum(loc.getSeqNum());
+    return builder.build();
+  }
+
+  public static HRegionLocation toRegionLocation(HBaseProtos.RegionLocation 
proto) {
+    org.apache.hadoop.hbase.client.RegionInfo regionInfo = 
toRegionInfo(proto.getRegionInfo());
+    ServerName serverName = proto.hasServerName() ? 
toServerName(proto.getServerName()) : null;
+    return new HRegionLocation(regionInfo, serverName, proto.getSeqNum());
+  }
+
   public static List<SnapshotDescription> toSnapshotDescriptionList(
       GetCompletedSnapshotsResponse response, Pattern pattern) {
     return 
response.getSnapshotsList().stream().map(ProtobufUtil::createSnapshotDesc)

http://git-wip-us.apache.org/repos/asf/hbase/blob/7b716c96/hbase-protocol-shaded/src/main/protobuf/HBase.proto
----------------------------------------------------------------------
diff --git a/hbase-protocol-shaded/src/main/protobuf/HBase.proto 
b/hbase-protocol-shaded/src/main/protobuf/HBase.proto
index 0af2ffd..d06bc8b 100644
--- a/hbase-protocol-shaded/src/main/protobuf/HBase.proto
+++ b/hbase-protocol-shaded/src/main/protobuf/HBase.proto
@@ -267,4 +267,10 @@ message FlushedRegionSequenceId {
 
 message FlushedSequenceId {
   repeated FlushedRegionSequenceId regionSequenceId = 1;
+}
+
+message RegionLocation {
+  required RegionInfo region_info = 1;
+  optional ServerName server_name = 2;
+  required int64 seq_num = 3;
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hbase/blob/7b716c96/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
----------------------------------------------------------------------
diff --git a/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto 
b/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
index 0b4e1d7..39d2824 100644
--- a/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
+++ b/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
@@ -441,11 +441,14 @@ message DisablePeerStateData {
 }
 
 enum ReopenTableRegionsState {
-  REOPEN_TABLE_REGIONS_REOPEN_ALL_REGIONS = 1;
+  REOPEN_TABLE_REGIONS_GET_REGIONS = 1;
+  REOPEN_TABLE_REGIONS_REOPEN_REGIONS = 2;
+  REOPEN_TABLE_REGIONS_CONFIRM_REOPENED = 3;
 }
 
 message ReopenTableRegionsStateData {
   required TableName table_name = 1;
+  repeated RegionLocation region = 2;
 }
 
 enum InitMetaState {

http://git-wip-us.apache.org/repos/asf/hbase/blob/7b716c96/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index 0736435..dbfb6d1 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -695,18 +695,6 @@ public class AssignmentManager implements ServerListener {
     return procs.toArray(UNASSIGN_PROCEDURE_ARRAY_TYPE);
   }
 
-  public MoveRegionProcedure[] createReopenProcedures(final 
Collection<RegionInfo> regionInfo)
-  throws IOException {
-    final MoveRegionProcedure[] procs = new 
MoveRegionProcedure[regionInfo.size()];
-    int index = 0;
-    for (RegionInfo hri: regionInfo) {
-      final ServerName serverName = regionStates.getRegionServerOfRegion(hri);
-      final RegionPlan plan = new RegionPlan(hri, serverName, serverName);
-      procs[index++] = createMoveRegionProcedure(plan);
-    }
-    return procs;
-  }
-
   /**
    * Called by things like DisableTableProcedure to get a list of 
UnassignProcedure
    * to unassign the regions of the table.
@@ -745,22 +733,21 @@ public class AssignmentManager implements ServerListener {
     return proc;
   }
 
-  public MoveRegionProcedure createMoveRegionProcedure(final RegionPlan plan)
-      throws HBaseIOException {
+  private MoveRegionProcedure createMoveRegionProcedure(RegionPlan plan) 
throws HBaseIOException {
     if (plan.getRegionInfo().getTable().isSystemTable()) {
       List<ServerName> exclude = getExcludedServersForSystemTable();
       if (plan.getDestination() != null && 
exclude.contains(plan.getDestination())) {
         try {
-          LOG.info("Can not move " + plan.getRegionInfo() + " to " + 
plan.getDestination()
-              + " because the server is not with highest version");
+          LOG.info("Can not move " + plan.getRegionInfo() + " to " + 
plan.getDestination() +
+            " because the server is not with highest version");
           
plan.setDestination(getBalancer().randomAssignment(plan.getRegionInfo(),
-              
this.master.getServerManager().createDestinationServersList(exclude)));
+            
this.master.getServerManager().createDestinationServersList(exclude)));
         } catch (HBaseIOException e) {
           LOG.warn(e.toString(), e);
         }
       }
     }
-    return new MoveRegionProcedure(getProcedureEnvironment(), plan);
+    return new MoveRegionProcedure(getProcedureEnvironment(), plan, true);
   }
 
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/7b716c96/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java
index 6fb73cd..139d41d 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java
@@ -56,11 +56,14 @@ public class MoveRegionProcedure extends 
AbstractStateMachineRegionProcedure<Mov
   }
 
   /**
-   * @throws IOException If the cluster is offline or master is stopping or if 
table is disabled
-   *   or non-existent.
+   * @param check whether we should do some checks in the constructor. We will 
skip the checks if we
+   *          are reopening a region as this may fail the whole procedure and 
cause stuck. We will
+   *          do the check later when actually executing the procedure so not 
a big problem.
+   * @throws IOException If the cluster is offline or master is stopping or if 
table is disabled or
+   *           non-existent.
    */
-  public MoveRegionProcedure(final MasterProcedureEnv env, final RegionPlan 
plan)
-  throws HBaseIOException {
+  public MoveRegionProcedure(MasterProcedureEnv env, RegionPlan plan, boolean 
check)
+      throws HBaseIOException {
     super(env, plan.getRegionInfo());
     this.plan = plan;
     preflightChecks(env, true);
@@ -70,9 +73,7 @@ public class MoveRegionProcedure extends 
AbstractStateMachineRegionProcedure<Mov
   @Override
   protected Flow executeFromState(final MasterProcedureEnv env, final 
MoveRegionState state)
       throws InterruptedException {
-    if (LOG.isTraceEnabled()) {
-      LOG.trace(this + " execute state=" + state);
-    }
+    LOG.trace("{} execute state={}", this, state);
     switch (state) {
       case MOVE_REGION_PREPARE:
         // Check context again and that region is online; do it here after we 
have lock on region.

http://git-wip-us.apache.org/repos/asf/hbase/blob/7b716c96/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java
index 2124d84..de9c4fd 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java
@@ -116,11 +116,13 @@ public class RegionStateStore {
 
       final ServerName lastHost = hrl.getServerName();
       final ServerName regionLocation = getRegionServer(result, replicaId);
-      final long openSeqNum = -1;
+      final long openSeqNum = hrl.getSeqNum();
 
       // TODO: move under trace, now is visible for debugging
-      LOG.info("Load hbase:meta entry region={}, regionState={}, lastHost={}, 
" +
-          "regionLocation={}", regionInfo.getEncodedName(), state, lastHost, 
regionLocation);
+      LOG.info(
+        "Load hbase:meta entry region={}, regionState={}, lastHost={}, " +
+          "regionLocation={}, openSeqNum={}",
+        regionInfo.getEncodedName(), state, lastHost, regionLocation, 
openSeqNum);
       visitor.visitRegionState(result, regionInfo, state, regionLocation, 
lastHost, openSeqNum);
     }
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/7b716c96/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
index d4951f7..15a2fbc 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
@@ -35,8 +35,9 @@ import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentSkipListMap;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.Predicate;
-
+import java.util.stream.Collectors;
 import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionLocation;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.RegionInfo;
@@ -49,6 +50,7 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+
 import 
org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
 
 /**
@@ -571,46 +573,115 @@ public class RegionStates {
   }
 
   /**
-   * @return Returns regions for a table which are open or about to be open 
(OPEN or OPENING)
+   * @return Return online regions of table; does not include OFFLINE or 
SPLITTING regions.
    */
-  public List<RegionInfo> getOpenRegionsOfTable(final TableName table) {
-    // We want to get regions which are already open on the cluster or are 
about to be open.
-    // The use-case is for identifying regions which need to be re-opened to 
ensure they see some
-    // new configuration. Regions in OPENING now are presently being opened by 
a RS, so we can
-    // assume that they will imminently be OPEN but may not see our 
configuration change
-    return getRegionsOfTable(
-        table, (state) -> state.isInState(State.OPEN) || 
state.isInState(State.OPENING));
+  public List<RegionInfo> getRegionsOfTable(final TableName table) {
+    return getRegionsOfTable(table, false);
+  }
+
+  private HRegionLocation createRegionForReopen(RegionStateNode node) {
+    synchronized (node) {
+      if (!include(node, false)) {
+        return null;
+      }
+      if (node.isInState(State.OPEN)) {
+        return new HRegionLocation(node.getRegionInfo(), 
node.getRegionLocation(),
+          node.getOpenSeqNum());
+      } else if (node.isInState(State.OPENING)) {
+        return new HRegionLocation(node.getRegionInfo(), 
node.getRegionLocation(), -1);
+      } else {
+        return null;
+      }
+    }
   }
 
   /**
-   * @return Return online regions of table; does not include OFFLINE or 
SPLITTING regions.
+   * Get the regions to be reopened when modifying a table.
+   * <p/>
+   * Notice that the {@code openSeqNum} in the returned HRegionLocation is 
also used to indicate the
+   * state of this region, positive means the region is in {@link State#OPEN}, 
-1 means
+   * {@link State#OPENING}. And for regions in other states we do not need 
reopen them.
    */
-  public List<RegionInfo> getRegionsOfTable(final TableName table) {
-    return getRegionsOfTable(table, false);
+  public List<HRegionLocation> getRegionsOfTableForReopen(TableName tableName) 
{
+    return 
getTableRegionStateNodes(tableName).stream().map(this::createRegionForReopen)
+      .filter(r -> r != null).collect(Collectors.toList());
+  }
+
+  /**
+   * Check whether the region has been reopened. The meaning of the {@link 
HRegionLocation} is the
+   * same with {@link #getRegionsOfTableForReopen(TableName)}.
+   * <p/>
+   * For a region which is in {@link State#OPEN} before, if the region state 
is changed or the open
+   * seq num is changed, we can confirm that it has been reopened.
+   * <p/>
+   * For a region which is in {@link State#OPENING} before, usually it will be 
in {@link State#OPEN}
+   * now and we will schedule a MRP to reopen it. But there are several 
exceptions:
+   * <ul>
+   * <li>The region is in state other than {@link State#OPEN} or {@link 
State#OPENING}.</li>
+   * <li>The location of the region has been changed</li>
+   * </ul>
+   * Of course the region could still be in {@link State#OPENING} state and 
still on the same
+   * server, then here we will still return a {@link HRegionLocation} for it, 
just like
+   * {@link #getRegionsOfTableForReopen(TableName)}.
+   * @param oldLoc the previous state/location of this region
+   * @return null if the region has been reopened, otherwise a new {@link 
HRegionLocation} which
+   *         means we still need to reopen the region.
+   * @see #getRegionsOfTableForReopen(TableName)
+   */
+  public HRegionLocation checkReopened(HRegionLocation oldLoc) {
+    RegionStateNode node = getRegionStateNode(oldLoc.getRegion());
+    synchronized (node) {
+      if (oldLoc.getSeqNum() >= 0) {
+        // in OPEN state before
+        if (node.isInState(State.OPEN)) {
+          if (node.getOpenSeqNum() > oldLoc.getSeqNum()) {
+            // normal case, the region has been reopened
+            return null;
+          } else {
+            // the open seq num does not change, need to reopen again
+            return new HRegionLocation(node.getRegionInfo(), 
node.getRegionLocation(),
+              node.getOpenSeqNum());
+          }
+        } else {
+          // the state has been changed so we can make sure that the region 
has been reopened(not
+          // finished maybe, but not a problem).
+          return null;
+        }
+      } else {
+        // in OPENING state before
+        if (!node.isInState(State.OPEN, State.OPENING)) {
+          // not in OPEN or OPENING state, then we can make sure that the 
region has been
+          // reopened(not finished maybe, but not a problem)
+          return null;
+        } else {
+          if (!node.getRegionLocation().equals(oldLoc.getServerName())) {
+            // the region has been moved, so we can make sure that the region 
has been reopened.
+            return null;
+          }
+          // normal case, we are still in OPENING state, or the reopen has 
been opened and the state
+          // is changed to OPEN.
+          long openSeqNum = node.isInState(State.OPEN) ? node.getOpenSeqNum() 
: -1;
+          return new HRegionLocation(node.getRegionInfo(), 
node.getRegionLocation(), openSeqNum);
+        }
+      }
+    }
   }
 
   /**
    * @return Return online regions of table; does not include OFFLINE or 
SPLITTING regions.
    */
-  public List<RegionInfo> getRegionsOfTable(final TableName table, boolean 
offline) {
-    return getRegionsOfTable(table, (state) -> include(state, offline));
+  public List<RegionInfo> getRegionsOfTable(TableName table, boolean offline) {
+    return getRegionsOfTable(table, state -> include(state, offline));
   }
 
   /**
    * @return Return the regions of the table; does not include OFFLINE unless 
you set
-   * <code>offline</code> to true. Does not include regions that are in the
-   * {@link State#SPLIT} state.
+   *         <code>offline</code> to true. Does not include regions that are 
in the
+   *         {@link State#SPLIT} state.
    */
-  public List<RegionInfo> getRegionsOfTable(
-      final TableName table, Predicate<RegionStateNode> filter) {
-    final ArrayList<RegionStateNode> nodes = getTableRegionStateNodes(table);
-    final ArrayList<RegionInfo> hris = new ArrayList<RegionInfo>(nodes.size());
-    for (RegionStateNode node: nodes) {
-      if (filter.test(node)) {
-        hris.add(node.getRegionInfo());
-      }
-    }
-    return hris;
+  private List<RegionInfo> getRegionsOfTable(TableName table, 
Predicate<RegionStateNode> filter) {
+    return getTableRegionStateNodes(table).stream().filter(filter).map(n -> 
n.getRegionInfo())
+      .collect(Collectors.toList());
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hbase/blob/7b716c96/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java
index bcf41ac..920c18b 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java
@@ -22,7 +22,6 @@ import java.io.IOException;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
-
 import org.apache.hadoop.hbase.DoNotRetryIOException;
 import org.apache.hadoop.hbase.HBaseIOException;
 import org.apache.hadoop.hbase.HConstants;
@@ -43,6 +42,7 @@ import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+
 import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
 import 
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.ModifyTableState;
@@ -56,8 +56,6 @@ public class ModifyTableProcedure
   private TableDescriptor modifiedTableDescriptor;
   private boolean deleteColumnFamilyInModify;
 
-  private Boolean traceEnabled = null;
-
   public ModifyTableProcedure() {
     super();
     initilize();
@@ -79,62 +77,57 @@ public class ModifyTableProcedure
 
   private void initilize() {
     this.unmodifiedTableDescriptor = null;
-    this.traceEnabled = null;
     this.deleteColumnFamilyInModify = false;
   }
 
   @Override
   protected Flow executeFromState(final MasterProcedureEnv env, final 
ModifyTableState state)
       throws InterruptedException {
-    if (isTraceEnabled()) {
-      LOG.trace(this + " execute state=" + state);
-    }
-
+    LOG.trace("{} execute state={}", this, state);
     try {
       switch (state) {
-      case MODIFY_TABLE_PREPARE:
-        prepareModify(env);
-        setNextState(ModifyTableState.MODIFY_TABLE_PRE_OPERATION);
-        break;
-      case MODIFY_TABLE_PRE_OPERATION:
-        preModify(env, state);
-        setNextState(ModifyTableState.MODIFY_TABLE_UPDATE_TABLE_DESCRIPTOR);
-        break;
-      case MODIFY_TABLE_UPDATE_TABLE_DESCRIPTOR:
-        updateTableDescriptor(env);
-        setNextState(ModifyTableState.MODIFY_TABLE_REMOVE_REPLICA_COLUMN);
-        break;
-      case MODIFY_TABLE_REMOVE_REPLICA_COLUMN:
-        updateReplicaColumnsIfNeeded(env, unmodifiedTableDescriptor, 
modifiedTableDescriptor);
-        if (deleteColumnFamilyInModify) {
-          setNextState(ModifyTableState.MODIFY_TABLE_DELETE_FS_LAYOUT);
-        } else {
+        case MODIFY_TABLE_PREPARE:
+          prepareModify(env);
+          setNextState(ModifyTableState.MODIFY_TABLE_PRE_OPERATION);
+          break;
+        case MODIFY_TABLE_PRE_OPERATION:
+          preModify(env, state);
+          setNextState(ModifyTableState.MODIFY_TABLE_UPDATE_TABLE_DESCRIPTOR);
+          break;
+        case MODIFY_TABLE_UPDATE_TABLE_DESCRIPTOR:
+          updateTableDescriptor(env);
+          setNextState(ModifyTableState.MODIFY_TABLE_REMOVE_REPLICA_COLUMN);
+          break;
+        case MODIFY_TABLE_REMOVE_REPLICA_COLUMN:
+          updateReplicaColumnsIfNeeded(env, unmodifiedTableDescriptor, 
modifiedTableDescriptor);
+          if (deleteColumnFamilyInModify) {
+            setNextState(ModifyTableState.MODIFY_TABLE_DELETE_FS_LAYOUT);
+          } else {
+            setNextState(ModifyTableState.MODIFY_TABLE_POST_OPERATION);
+          }
+          break;
+        case MODIFY_TABLE_DELETE_FS_LAYOUT:
+          deleteFromFs(env, unmodifiedTableDescriptor, 
modifiedTableDescriptor);
           setNextState(ModifyTableState.MODIFY_TABLE_POST_OPERATION);
-        }
-        break;
-      case MODIFY_TABLE_DELETE_FS_LAYOUT:
-        deleteFromFs(env, unmodifiedTableDescriptor, modifiedTableDescriptor);
-        setNextState(ModifyTableState.MODIFY_TABLE_POST_OPERATION);
-        break;
-      case MODIFY_TABLE_POST_OPERATION:
-        postModify(env, state);
-        setNextState(ModifyTableState.MODIFY_TABLE_REOPEN_ALL_REGIONS);
-        break;
-      case MODIFY_TABLE_REOPEN_ALL_REGIONS:
-        if (env.getAssignmentManager().isTableEnabled(getTableName())) {
-          addChildProcedure(env.getAssignmentManager()
-            .createReopenProcedures(getOpenRegionInfoList(env)));
-        }
-        return Flow.NO_MORE_STATE;
-      default:
-        throw new UnsupportedOperationException("unhandled state=" + state);
+          break;
+        case MODIFY_TABLE_POST_OPERATION:
+          postModify(env, state);
+          setNextState(ModifyTableState.MODIFY_TABLE_REOPEN_ALL_REGIONS);
+          break;
+        case MODIFY_TABLE_REOPEN_ALL_REGIONS:
+          if (env.getAssignmentManager().isTableEnabled(getTableName())) {
+            addChildProcedure(new ReopenTableRegionsProcedure(getTableName()));
+          }
+          return Flow.NO_MORE_STATE;
+        default:
+          throw new UnsupportedOperationException("unhandled state=" + state);
       }
     } catch (IOException e) {
       if (isRollbackSupported(state)) {
         setFailure("master-modify-table", e);
       } else {
-        LOG.warn("Retriable error trying to modify table=" + getTableName() +
-          " (in state=" + state + ")", e);
+        LOG.warn("Retriable error trying to modify table={} (in state={})", 
getTableName(), state,
+          e);
       }
     }
     return Flow.HAS_MORE_STATE;
@@ -172,7 +165,7 @@ public class ModifyTableProcedure
 
   @Override
   protected ModifyTableState getState(final int stateId) {
-    return ModifyTableState.valueOf(stateId);
+    return ModifyTableState.forNumber(stateId);
   }
 
   @Override
@@ -294,22 +287,6 @@ public class ModifyTableProcedure
   }
 
   /**
-   * Undo the descriptor change (for rollback)
-   * @param env MasterProcedureEnv
-   * @throws IOException
-   **/
-  private void restoreTableDescriptor(final MasterProcedureEnv env) throws 
IOException {
-    
env.getMasterServices().getTableDescriptors().add(unmodifiedTableDescriptor);
-
-    // delete any new column families from the modifiedTableDescriptor.
-    deleteFromFs(env, modifiedTableDescriptor, unmodifiedTableDescriptor);
-
-    // Make sure regions are opened after table descriptor is updated.
-    //reOpenAllRegionsIfTableIsOnline(env);
-    // TODO: NUKE ROLLBACK!!!!
-  }
-
-  /**
    * Removes from hdfs the families that are not longer present in the new 
table descriptor.
    * @param env MasterProcedureEnv
    * @throws IOException
@@ -394,18 +371,6 @@ public class ModifyTableProcedure
   }
 
   /**
-   * The procedure could be restarted from a different machine. If the 
variable is null, we need to
-   * retrieve it.
-   * @return traceEnabled whether the trace is enabled
-   */
-  private Boolean isTraceEnabled() {
-    if (traceEnabled == null) {
-      traceEnabled = LOG.isTraceEnabled();
-    }
-    return traceEnabled;
-  }
-
-  /**
    * Coprocessor Action.
    * @param env MasterProcedureEnv
    * @param state the procedure state
@@ -438,13 +403,4 @@ public class ModifyTableProcedure
   private List<RegionInfo> getRegionInfoList(final MasterProcedureEnv env) 
throws IOException {
     return 
env.getAssignmentManager().getRegionStates().getRegionsOfTable(getTableName());
   }
-
-  /**
-   * Fetches all open or soon to be open Regions for a table. Cache the result 
of this method if
-   * you need to use it multiple times. Be aware that it may change over in 
between calls to this
-   * procedure.
-   */
-  private List<RegionInfo> getOpenRegionInfoList(final MasterProcedureEnv env) 
throws IOException {
-    return 
env.getAssignmentManager().getRegionStates().getOpenRegionsOfTable(getTableName());
-  }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/7b716c96/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
index 133d6f4..7928c5b 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
@@ -18,7 +18,14 @@
 package org.apache.hadoop.hbase.master.procedure;
 
 import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+import org.apache.hadoop.hbase.HBaseIOException;
+import org.apache.hadoop.hbase.HRegionLocation;
 import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.master.RegionPlan;
+import org.apache.hadoop.hbase.master.assignment.MoveRegionProcedure;
 import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
 import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
 import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
@@ -31,8 +38,9 @@ import 
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.R
 import 
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.ReopenTableRegionsStateData;
 
 /**
- * Used for non table procedures to reopen the regions for a table. For 
example,
- * {@link org.apache.hadoop.hbase.master.replication.ModifyPeerProcedure}.
+ * Used for reopening the regions for a table.
+ * <p/>
+ * Currently we use {@link MoveRegionProcedure} to reopen regions.
  */
 @InterfaceAudience.Private
 public class ReopenTableRegionsProcedure
@@ -42,6 +50,8 @@ public class ReopenTableRegionsProcedure
 
   private TableName tableName;
 
+  private List<HRegionLocation> regions = Collections.emptyList();
+
   public ReopenTableRegionsProcedure() {
   }
 
@@ -59,19 +69,53 @@ public class ReopenTableRegionsProcedure
     return TableOperationType.REGION_EDIT;
   }
 
+  private MoveRegionProcedure createReopenProcedure(MasterProcedureEnv env, 
HRegionLocation loc) {
+    try {
+      return new MoveRegionProcedure(env,
+        new RegionPlan(loc.getRegion(), loc.getServerName(), 
loc.getServerName()), false);
+    } catch (HBaseIOException e) {
+      // we skip the checks so this should not happen
+      throw new AssertionError(e);
+    }
+  }
+
   @Override
   protected Flow executeFromState(MasterProcedureEnv env, 
ReopenTableRegionsState state)
       throws ProcedureSuspendedException, ProcedureYieldException, 
InterruptedException {
     switch (state) {
-      case REOPEN_TABLE_REGIONS_REOPEN_ALL_REGIONS:
-        try {
-          addChildProcedure(env.getAssignmentManager().createReopenProcedures(
-            
env.getAssignmentManager().getRegionStates().getRegionsOfTable(tableName)));
-        } catch (IOException e) {
-          LOG.warn("Failed to schedule reopen procedures for {}", tableName, 
e);
-          throw new ProcedureSuspendedException();
+      case REOPEN_TABLE_REGIONS_GET_REGIONS:
+        if (!env.getAssignmentManager().isTableEnabled(tableName)) {
+          LOG.info("Table {} is disabled, give up reopening its regions");
+          return Flow.NO_MORE_STATE;
+        }
+        regions =
+          
env.getAssignmentManager().getRegionStates().getRegionsOfTableForReopen(tableName);
+        
setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
+        return Flow.HAS_MORE_STATE;
+      case REOPEN_TABLE_REGIONS_REOPEN_REGIONS:
+        addChildProcedure(regions.stream().filter(l -> l.getSeqNum() >= 0)
+          .map(l -> createReopenProcedure(env, 
l)).toArray(MoveRegionProcedure[]::new));
+        
setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_CONFIRM_REOPENED);
+        return Flow.HAS_MORE_STATE;
+      case REOPEN_TABLE_REGIONS_CONFIRM_REOPENED:
+        regions = 
regions.stream().map(env.getAssignmentManager().getRegionStates()::checkReopened)
+          .filter(l -> l != null).collect(Collectors.toList());
+        if (regions.isEmpty()) {
+          return Flow.NO_MORE_STATE;
+        }
+        if (regions.stream().anyMatch(l -> l.getSeqNum() >= 0)) {
+          
setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
+          return Flow.HAS_MORE_STATE;
         }
-        return Flow.NO_MORE_STATE;
+        LOG.info("There are still {} region(s) which need to be reopened for 
table {} are in " +
+          "OPENING state, try again later", regions.size(), tableName);
+        // All the regions need to reopen are in OPENING state which means we 
can not schedule any
+        // MRPs. Then sleep for one second, and yield the procedure to let 
other procedures run
+        // first and hope next time we can get some regions in other state to 
make progress.
+        // TODO: add a delay for ProcedureYieldException so that we do not 
need to sleep here which
+        // blocks a procedure worker.
+        Thread.sleep(1000);
+        throw new ProcedureYieldException();
       default:
         throw new UnsupportedOperationException("unhandled state=" + state);
     }
@@ -95,20 +139,24 @@ public class ReopenTableRegionsProcedure
 
   @Override
   protected ReopenTableRegionsState getInitialState() {
-    return ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_ALL_REGIONS;
+    return ReopenTableRegionsState.REOPEN_TABLE_REGIONS_GET_REGIONS;
   }
 
   @Override
   protected void serializeStateData(ProcedureStateSerializer serializer) 
throws IOException {
     super.serializeStateData(serializer);
-    serializer.serialize(ReopenTableRegionsStateData.newBuilder()
-      .setTableName(ProtobufUtil.toProtoTableName(tableName)).build());
+    ReopenTableRegionsStateData.Builder builder = 
ReopenTableRegionsStateData.newBuilder()
+      .setTableName(ProtobufUtil.toProtoTableName(tableName));
+    
regions.stream().map(ProtobufUtil::toRegionLocation).forEachOrdered(builder::addRegion);
+    serializer.serialize(builder.build());
   }
 
   @Override
   protected void deserializeStateData(ProcedureStateSerializer serializer) 
throws IOException {
     super.deserializeStateData(serializer);
-    tableName = ProtobufUtil
-      
.toTableName(serializer.deserialize(ReopenTableRegionsStateData.class).getTableName());
+    ReopenTableRegionsStateData data = 
serializer.deserialize(ReopenTableRegionsStateData.class);
+    tableName = ProtobufUtil.toTableName(data.getTableName());
+    regions = data.getRegionList().stream().map(ProtobufUtil::toRegionLocation)
+      .collect(Collectors.toList());
   }
 }

Reply via email to