Repository: hbase
Updated Branches:
  refs/heads/master f6a017ce6 -> 5b850caa8


HBASE-12791 HBase does not attempt to clean up an aborted split when the 
regionserver shutting down(Rajeshbabu)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/5b850caa
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/5b850caa
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/5b850caa

Branch: refs/heads/master
Commit: 5b850caa801d033d800bf6c1bea32aa2c7bda273
Parents: f6a017c
Author: Rajeshbabu Chintaguntla <[email protected]>
Authored: Mon Jan 12 01:04:26 2015 +0530
Committer: Rajeshbabu Chintaguntla <[email protected]>
Committed: Mon Jan 12 01:04:26 2015 +0530

----------------------------------------------------------------------
 .../hadoop/hbase/master/RegionStates.java       | 21 ++++++--
 .../org/apache/hadoop/hbase/util/FSUtils.java   | 15 ++++++
 .../org/apache/hadoop/hbase/util/HBaseFsck.java | 44 +++++++++++++--
 .../TestSplitTransactionOnCluster.java          | 40 ++++++++++++++
 .../apache/hadoop/hbase/util/TestHBaseFsck.java | 57 ++++++++++++++++++++
 5 files changed, 169 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/5b850caa/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
index 5a9344b..d4bd9a4 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
@@ -31,6 +31,7 @@ import java.util.TreeMap;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
@@ -46,6 +47,7 @@ import org.apache.hadoop.hbase.client.RegionReplicaUtil;
 import org.apache.hadoop.hbase.master.RegionState.State;
 import org.apache.hadoop.hbase.client.TableState;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.Pair;
 
 /**
@@ -568,10 +570,6 @@ public class RegionStates {
       }
     }
 
-    for (HRegionInfo hri : regionsToOffline) {
-      regionOffline(hri);
-    }
-
     for (RegionState state : regionsInTransition.values()) {
       HRegionInfo hri = state.getRegion();
       if (assignedRegions.contains(hri)) {
@@ -591,12 +589,27 @@ public class RegionStates {
             State.FAILED_OPEN, State.FAILED_CLOSE, State.OFFLINE)) {
           LOG.info("Found region in " + state + " to be reassigned by SSH for 
" + sn);
           rits.add(hri);
+        } else if (isOneOfStates(state, State.SPLITTING_NEW)) {
+          try {
+            if (MetaTableAccessor.getRegion(server.getConnection(), 
state.getRegion()
+                .getEncodedNameAsBytes()) == null) {
+              regionsToOffline.add(state.getRegion());
+              FSUtils.deleteRegionDir(server.getConfiguration(), 
state.getRegion());
+            }
+          } catch (IOException e) {
+            LOG.warn("Got exception while deleting " + state.getRegion()
+                + " directories from file system.", e);
+          }
         } else {
           LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
         }
       }
     }
 
+    for (HRegionInfo hri : regionsToOffline) {
+      regionOffline(hri);
+    }
+
     this.notifyAll();
     return rits;
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/5b850caa/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java
index 50532a1..7cda55d 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java
@@ -183,6 +183,21 @@ public abstract class FSUtils {
   }
 
   /**
+   * Delete the region directory if exists.
+   * @param conf
+   * @param hri
+   * @return True if deleted the region directory.
+   * @throws IOException
+   */
+  public static boolean deleteRegionDir(final Configuration conf, final 
HRegionInfo hri)
+  throws IOException {
+    Path rootDir = getRootDir(conf);
+    FileSystem fs = rootDir.getFileSystem(conf);
+    return deleteDirectory(fs,
+      new Path(getTableDir(rootDir, hri.getTable()), hri.getEncodedName()));
+  }
+
+  /**
    * Return the number of bytes that large input files should be optimally
    * be split into to minimize i/o time.
    *

http://git-wip-us.apache.org/repos/asf/hbase/blob/5b850caa/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
index 94da7ab..a5ac8b8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
@@ -53,7 +53,6 @@ import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 
-
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -85,8 +84,6 @@ import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableDescriptor;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceStability;
 import org.apache.hadoop.hbase.client.Admin;
 import org.apache.hadoop.hbase.client.ClusterConnection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
@@ -134,7 +131,6 @@ import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.zookeeper.KeeperException;
 
-import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
@@ -1996,6 +1992,43 @@ public class HBaseFsck extends Configured implements 
Closeable {
           return;
         }
 
+        HRegionInfo hri = hbi.getHdfsHRI();
+        TableInfo tableInfo = tablesInfo.get(hri.getTable());
+        if (tableInfo.regionsFromMeta.isEmpty()) {
+          for (HbckInfo h : regionInfoMap.values()) {
+            if (h.getTableName().equals(hri.getTable())) {
+              if (h.metaEntry != null) tableInfo.regionsFromMeta
+                  .add((HRegionInfo) h.metaEntry);
+            }
+          }
+          Collections.sort(tableInfo.regionsFromMeta);
+        }
+        for (HRegionInfo region : tableInfo.regionsFromMeta) {
+          if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
+              && (region.getEndKey().length == 0 || 
Bytes.compareTo(region.getEndKey(),
+                hri.getEndKey()) >= 0)
+              && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
+            if(region.isSplit() || region.isOffline()) continue;
+            Path regionDir = hbi.getHdfsRegionDir();
+            FileSystem fs = regionDir.getFileSystem(getConf());
+            List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
+            for (Path familyDir : familyDirs) {
+              List<Path> referenceFilePaths = 
FSUtils.getReferenceFilePaths(fs, familyDir);
+              for (Path referenceFilePath : referenceFilePaths) {
+                Path parentRegionDir =
+                    
StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
+                if 
(parentRegionDir.toString().endsWith(region.getEncodedName())) {
+                  LOG.warn(hri + " start and stop keys are in the range of " + 
region
+                      + ". The region might not be cleaned up from hdfs when 
region " + region
+                      + " split failed. Hence deleting from hdfs.");
+                  HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
+                    regionDir.getParent(), hri);
+                  return;
+                }
+              }
+            }
+          }
+        }
         LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
         int numReplicas = 
admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
         HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), 
hbi.getHdfsHRI(),
@@ -2325,6 +2358,9 @@ public class HBaseFsck extends Configured implements 
Closeable {
     final Multimap<byte[], HbckInfo> overlapGroups =
       TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
 
+    // list of regions derived from meta entries.
+    final List<HRegionInfo> regionsFromMeta = new ArrayList<HRegionInfo>();
+
     TableInfo(TableName name) {
       this.tableName = name;
       deployedOn = new TreeSet <ServerName>();

http://git-wip-us.apache.org/repos/asf/hbase/blob/5b850caa/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
index bd66b39..321ad12 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
@@ -950,6 +950,46 @@ public class TestSplitTransactionOnCluster {
     }
   }
 
+  @Test (timeout=300000)
+  public void testSSHCleanupDaugtherRegionsOfAbortedSplit() throws Exception {
+    TableName table = 
TableName.valueOf("testSSHCleanupDaugtherRegionsOfAbortedSplit");
+    try {
+      HTableDescriptor desc = new HTableDescriptor(table);
+      desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
+      admin.createTable(desc);
+      HTable hTable = new HTable(cluster.getConfiguration(), 
desc.getTableName());
+      for(int i = 1; i < 5; i++) {
+        Put p1 = new Put(("r"+i).getBytes());
+        p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
+        hTable.put(p1);
+      }
+      admin.flush(desc.getTableName());
+      List<HRegion> regions = cluster.getRegions(desc.getTableName());
+      int serverWith = cluster.getServerWith(regions.get(0).getRegionName());
+      HRegionServer regionServer = cluster.getRegionServer(serverWith);
+      cluster.getServerWith(regions.get(0).getRegionName());
+      SplitTransaction st = new SplitTransaction(regions.get(0), 
Bytes.toBytes("r3"));
+      st.prepare();
+      st.stepsBeforePONR(regionServer, regionServer, false);
+      Path tableDir =
+          
FSUtils.getTableDir(cluster.getMaster().getMasterFileSystem().getRootDir(),
+            desc.getTableName());
+      tableDir.getFileSystem(cluster.getConfiguration());
+      List<Path> regionDirs =
+          
FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), 
tableDir);
+      assertEquals(3,regionDirs.size());
+      AssignmentManager am = cluster.getMaster().getAssignmentManager();
+      am.processServerShutdown(regionServer.getServerName());
+      assertEquals(am.getRegionStates().getRegionsInTransition().toString(), 
0, am
+          .getRegionStates().getRegionsInTransition().size());
+      regionDirs =
+          
FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), 
tableDir);
+      assertEquals(1,regionDirs.size());
+    } finally {
+      TESTING_UTIL.deleteTable(table);
+    }
+  }
+
   private void testSplitBeforeSettingSplittingInZKInternals() throws Exception 
{
     final TableName tableName = 
TableName.valueOf("testSplitBeforeSettingSplittingInZK");
     try {

http://git-wip-us.apache.org/repos/asf/hbase/blob/5b850caa/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
index 4fa78f4..33bd337 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
@@ -88,6 +88,7 @@ import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.io.hfile.TestHFile;
 import org.apache.hadoop.hbase.master.AssignmentManager;
 import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.RegionState;
 import org.apache.hadoop.hbase.master.RegionStates;
 import org.apache.hadoop.hbase.master.TableLockManager;
 import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
@@ -96,6 +97,7 @@ import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.regionserver.SplitTransaction;
 import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.testclassification.MiscTests;
@@ -1173,6 +1175,61 @@ public class TestHBaseFsck {
     }
   }
 
+  @Test (timeout=180000)
+  public void testCleanUpDaughtersNotInMetaAfterFailedSplit() throws Exception 
{
+    TableName table = 
TableName.valueOf("testCleanUpDaughtersNotInMetaAfterFailedSplit");
+    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
+    try {
+      HTableDescriptor desc = new HTableDescriptor(table);
+      desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
+      admin.createTable(desc);
+      tbl = new HTable(cluster.getConfiguration(), desc.getTableName());
+      for (int i = 0; i < 5; i++) {
+        Put p1 = new Put(("r" + i).getBytes());
+        p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
+        tbl.put(p1);
+      }
+      admin.flush(desc.getTableName());
+      List<HRegion> regions = cluster.getRegions(desc.getTableName());
+      int serverWith = cluster.getServerWith(regions.get(0).getRegionName());
+      HRegionServer regionServer = cluster.getRegionServer(serverWith);
+      cluster.getServerWith(regions.get(0).getRegionName());
+      SplitTransaction st = new SplitTransaction(regions.get(0), 
Bytes.toBytes("r3"));
+      st.prepare();
+      st.stepsBeforePONR(regionServer, regionServer, false);
+      AssignmentManager am = cluster.getMaster().getAssignmentManager();
+      Map<String, RegionState> regionsInTransition = 
am.getRegionStates().getRegionsInTransition();
+      for (RegionState state : regionsInTransition.values()) {
+        am.regionOffline(state.getRegion());
+      }
+      Map<HRegionInfo, ServerName> regionsMap = new HashMap<HRegionInfo, 
ServerName>();
+      regionsMap.put(regions.get(0).getRegionInfo(), 
regionServer.getServerName());
+      am.assign(regionsMap);
+      am.waitForAssignment(regions.get(0).getRegionInfo());
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
+          ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
+      // holes are separate from overlap groups
+      assertEquals(0, hbck.getOverlapGroups(table).size());
+
+      // fix hole
+      assertErrors(
+        doFsck(conf, false, true, false, false, false, false, false, false, 
false, false, null),
+        new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
+          ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
+
+      // check that hole fixed
+      assertNoErrors(doFsck(conf, false));
+      assertEquals(5, countRows());
+    } finally {
+      if (tbl != null) {
+        tbl.close();
+        tbl = null;
+      }
+      cleanupTable(table);
+    }
+  }
+
   /**
    * This creates fixes a bad table with a hole in meta.
    */

Reply via email to