Repository: hbase Updated Branches: refs/heads/master f6a017ce6 -> 5b850caa8
HBASE-12791 HBase does not attempt to clean up an aborted split when the regionserver shutting down(Rajeshbabu) Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/5b850caa Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/5b850caa Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/5b850caa Branch: refs/heads/master Commit: 5b850caa801d033d800bf6c1bea32aa2c7bda273 Parents: f6a017c Author: Rajeshbabu Chintaguntla <[email protected]> Authored: Mon Jan 12 01:04:26 2015 +0530 Committer: Rajeshbabu Chintaguntla <[email protected]> Committed: Mon Jan 12 01:04:26 2015 +0530 ---------------------------------------------------------------------- .../hadoop/hbase/master/RegionStates.java | 21 ++++++-- .../org/apache/hadoop/hbase/util/FSUtils.java | 15 ++++++ .../org/apache/hadoop/hbase/util/HBaseFsck.java | 44 +++++++++++++-- .../TestSplitTransactionOnCluster.java | 40 ++++++++++++++ .../apache/hadoop/hbase/util/TestHBaseFsck.java | 57 ++++++++++++++++++++ 5 files changed, 169 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/5b850caa/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java index 5a9344b..d4bd9a4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java @@ -31,6 +31,7 @@ import java.util.TreeMap; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.classification.InterfaceAudience; @@ -46,6 +47,7 @@ import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.client.TableState; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.Pair; /** @@ -568,10 +570,6 @@ public class RegionStates { } } - for (HRegionInfo hri : regionsToOffline) { - regionOffline(hri); - } - for (RegionState state : regionsInTransition.values()) { HRegionInfo hri = state.getRegion(); if (assignedRegions.contains(hri)) { @@ -591,12 +589,27 @@ public class RegionStates { State.FAILED_OPEN, State.FAILED_CLOSE, State.OFFLINE)) { LOG.info("Found region in " + state + " to be reassigned by SSH for " + sn); rits.add(hri); + } else if (isOneOfStates(state, State.SPLITTING_NEW)) { + try { + if (MetaTableAccessor.getRegion(server.getConnection(), state.getRegion() + .getEncodedNameAsBytes()) == null) { + regionsToOffline.add(state.getRegion()); + FSUtils.deleteRegionDir(server.getConfiguration(), state.getRegion()); + } + } catch (IOException e) { + LOG.warn("Got exception while deleting " + state.getRegion() + + " directories from file system.", e); + } } else { LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state); } } } + for (HRegionInfo hri : regionsToOffline) { + regionOffline(hri); + } + this.notifyAll(); return rits; } http://git-wip-us.apache.org/repos/asf/hbase/blob/5b850caa/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java index 50532a1..7cda55d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java @@ -183,6 +183,21 @@ public abstract class FSUtils { } /** + * Delete the region directory if exists. + * @param conf + * @param hri + * @return True if deleted the region directory. + * @throws IOException + */ + public static boolean deleteRegionDir(final Configuration conf, final HRegionInfo hri) + throws IOException { + Path rootDir = getRootDir(conf); + FileSystem fs = rootDir.getFileSystem(conf); + return deleteDirectory(fs, + new Path(getTableDir(rootDir, hri.getTable()), hri.getEncodedName())); + } + + /** * Return the number of bytes that large input files should be optimally * be split into to minimize i/o time. * http://git-wip-us.apache.org/repos/asf/hbase/blob/5b850caa/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java index 94da7ab..a5ac8b8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -53,7 +53,6 @@ import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -85,8 +84,6 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableDescriptor; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.ZooKeeperConnectionException; -import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.ClusterConnection; import org.apache.hadoop.hbase.client.ConnectionFactory; @@ -134,7 +131,6 @@ import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.zookeeper.KeeperException; -import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; @@ -1996,6 +1992,43 @@ public class HBaseFsck extends Configured implements Closeable { return; } + HRegionInfo hri = hbi.getHdfsHRI(); + TableInfo tableInfo = tablesInfo.get(hri.getTable()); + if (tableInfo.regionsFromMeta.isEmpty()) { + for (HbckInfo h : regionInfoMap.values()) { + if (h.getTableName().equals(hri.getTable())) { + if (h.metaEntry != null) tableInfo.regionsFromMeta + .add((HRegionInfo) h.metaEntry); + } + } + Collections.sort(tableInfo.regionsFromMeta); + } + for (HRegionInfo region : tableInfo.regionsFromMeta) { + if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0 + && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(), + hri.getEndKey()) >= 0) + && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) { + if(region.isSplit() || region.isOffline()) continue; + Path regionDir = hbi.getHdfsRegionDir(); + FileSystem fs = regionDir.getFileSystem(getConf()); + List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir); + for (Path familyDir : familyDirs) { + List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir); + for (Path referenceFilePath : referenceFilePaths) { + Path parentRegionDir = + StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent(); + if (parentRegionDir.toString().endsWith(region.getEncodedName())) { + LOG.warn(hri + " start and stop keys are in the range of " + region + + ". The region might not be cleaned up from hdfs when region " + region + + " split failed. Hence deleting from hdfs."); + HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs, + regionDir.getParent(), hri); + return; + } + } + } + } + } LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI()); int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication(); HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(), @@ -2325,6 +2358,9 @@ public class HBaseFsck extends Configured implements Closeable { final Multimap<byte[], HbckInfo> overlapGroups = TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp); + // list of regions derived from meta entries. + final List<HRegionInfo> regionsFromMeta = new ArrayList<HRegionInfo>(); + TableInfo(TableName name) { this.tableName = name; deployedOn = new TreeSet <ServerName>(); http://git-wip-us.apache.org/repos/asf/hbase/blob/5b850caa/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java index bd66b39..321ad12 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java @@ -950,6 +950,46 @@ public class TestSplitTransactionOnCluster { } } + @Test (timeout=300000) + public void testSSHCleanupDaugtherRegionsOfAbortedSplit() throws Exception { + TableName table = TableName.valueOf("testSSHCleanupDaugtherRegionsOfAbortedSplit"); + try { + HTableDescriptor desc = new HTableDescriptor(table); + desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f"))); + admin.createTable(desc); + HTable hTable = new HTable(cluster.getConfiguration(), desc.getTableName()); + for(int i = 1; i < 5; i++) { + Put p1 = new Put(("r"+i).getBytes()); + p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes()); + hTable.put(p1); + } + admin.flush(desc.getTableName()); + List<HRegion> regions = cluster.getRegions(desc.getTableName()); + int serverWith = cluster.getServerWith(regions.get(0).getRegionName()); + HRegionServer regionServer = cluster.getRegionServer(serverWith); + cluster.getServerWith(regions.get(0).getRegionName()); + SplitTransaction st = new SplitTransaction(regions.get(0), Bytes.toBytes("r3")); + st.prepare(); + st.stepsBeforePONR(regionServer, regionServer, false); + Path tableDir = + FSUtils.getTableDir(cluster.getMaster().getMasterFileSystem().getRootDir(), + desc.getTableName()); + tableDir.getFileSystem(cluster.getConfiguration()); + List<Path> regionDirs = + FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir); + assertEquals(3,regionDirs.size()); + AssignmentManager am = cluster.getMaster().getAssignmentManager(); + am.processServerShutdown(regionServer.getServerName()); + assertEquals(am.getRegionStates().getRegionsInTransition().toString(), 0, am + .getRegionStates().getRegionsInTransition().size()); + regionDirs = + FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir); + assertEquals(1,regionDirs.size()); + } finally { + TESTING_UTIL.deleteTable(table); + } + } + private void testSplitBeforeSettingSplittingInZKInternals() throws Exception { final TableName tableName = TableName.valueOf("testSplitBeforeSettingSplittingInZK"); try { http://git-wip-us.apache.org/repos/asf/hbase/blob/5b850caa/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java index 4fa78f4..33bd337 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java @@ -88,6 +88,7 @@ import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.io.hfile.TestHFile; import org.apache.hadoop.hbase.master.AssignmentManager; import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.master.RegionStates; import org.apache.hadoop.hbase.master.TableLockManager; import org.apache.hadoop.hbase.master.TableLockManager.TableLock; @@ -96,6 +97,7 @@ import org.apache.hadoop.hbase.protobuf.generated.AdminProtos; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.regionserver.SplitTransaction; import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.MiscTests; @@ -1173,6 +1175,61 @@ public class TestHBaseFsck { } } + @Test (timeout=180000) + public void testCleanUpDaughtersNotInMetaAfterFailedSplit() throws Exception { + TableName table = TableName.valueOf("testCleanUpDaughtersNotInMetaAfterFailedSplit"); + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + try { + HTableDescriptor desc = new HTableDescriptor(table); + desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f"))); + admin.createTable(desc); + tbl = new HTable(cluster.getConfiguration(), desc.getTableName()); + for (int i = 0; i < 5; i++) { + Put p1 = new Put(("r" + i).getBytes()); + p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes()); + tbl.put(p1); + } + admin.flush(desc.getTableName()); + List<HRegion> regions = cluster.getRegions(desc.getTableName()); + int serverWith = cluster.getServerWith(regions.get(0).getRegionName()); + HRegionServer regionServer = cluster.getRegionServer(serverWith); + cluster.getServerWith(regions.get(0).getRegionName()); + SplitTransaction st = new SplitTransaction(regions.get(0), Bytes.toBytes("r3")); + st.prepare(); + st.stepsBeforePONR(regionServer, regionServer, false); + AssignmentManager am = cluster.getMaster().getAssignmentManager(); + Map<String, RegionState> regionsInTransition = am.getRegionStates().getRegionsInTransition(); + for (RegionState state : regionsInTransition.values()) { + am.regionOffline(state.getRegion()); + } + Map<HRegionInfo, ServerName> regionsMap = new HashMap<HRegionInfo, ServerName>(); + regionsMap.put(regions.get(0).getRegionInfo(), regionServer.getServerName()); + am.assign(regionsMap); + am.waitForAssignment(regions.get(0).getRegionInfo()); + HBaseFsck hbck = doFsck(conf, false); + assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + ERROR_CODE.NOT_IN_META_OR_DEPLOYED }); + // holes are separate from overlap groups + assertEquals(0, hbck.getOverlapGroups(table).size()); + + // fix hole + assertErrors( + doFsck(conf, false, true, false, false, false, false, false, false, false, false, null), + new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + ERROR_CODE.NOT_IN_META_OR_DEPLOYED }); + + // check that hole fixed + assertNoErrors(doFsck(conf, false)); + assertEquals(5, countRows()); + } finally { + if (tbl != null) { + tbl.close(); + tbl = null; + } + cleanupTable(table); + } + } + /** * This creates fixes a bad table with a hole in meta. */
