Repository: hbase Updated Branches: refs/heads/0.98 81e6831af -> 01039eb2f
HBASE-12791 HBase does not attempt to clean up an aborted split when the regionserver shutting down(Rajeshbabu) Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/01039eb2 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/01039eb2 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/01039eb2 Branch: refs/heads/0.98 Commit: 01039eb2f5d143ec40e1f6b8dae9d35ff67e55c4 Parents: 81e6831 Author: Rajeshbabu Chintaguntla <[email protected]> Authored: Mon Jan 12 07:10:29 2015 +0530 Committer: Rajeshbabu Chintaguntla <[email protected]> Committed: Mon Jan 12 07:10:29 2015 +0530 ---------------------------------------------------------------------- .../hadoop/hbase/master/RegionStates.java | 19 +++++-- .../org/apache/hadoop/hbase/util/FSUtils.java | 15 +++++ .../org/apache/hadoop/hbase/util/HBaseFsck.java | 41 ++++++++++++++ .../TestSplitTransactionOnCluster.java | 40 +++++++++++++ .../apache/hadoop/hbase/util/TestHBaseFsck.java | 59 ++++++++++++++++++++ 5 files changed, 170 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/01039eb2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java index 1317124..68f3c18 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; @@ -505,10 +506,6 @@ public class RegionStates { } } - for (HRegionInfo hri : regionsToOffline) { - regionOffline(hri); - } - for (RegionState state : regionsInTransition.values()) { HRegionInfo hri = state.getRegion(); if (assignedRegions.contains(hri)) { @@ -527,12 +524,26 @@ public class RegionStates { if (state.isPendingOpenOrOpening() || state.isFailedClose() || state.isOffline()) { LOG.info("Found region in " + state + " to be reassigned by SSH for " + sn); rits.add(hri); + } else if(state.isSplittingNew()) { + try { + if (MetaReader.getRegion(server.getCatalogTracker(), state.getRegion().getRegionName()) == null) { + regionsToOffline.add(state.getRegion()); + FSUtils.deleteRegionDir(server.getConfiguration(), state.getRegion()); + } + } catch (IOException e) { + LOG.warn("Got exception while deleting " + state.getRegion() + + " directories from file system.", e); + } } else { LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state); } } } + for (HRegionInfo hri : regionsToOffline) { + regionOffline(hri); + } + this.notifyAll(); return rits; } http://git-wip-us.apache.org/repos/asf/hbase/blob/01039eb2/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java index 81c7679..d72634e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java @@ -183,6 +183,21 @@ public abstract class FSUtils { } /** + * Delete the region directory if exists. + * @param conf + * @param hri + * @return True if deleted the region directory. + * @throws IOException + */ + public static boolean deleteRegionDir(final Configuration conf, final HRegionInfo hri) + throws IOException { + Path rootDir = getRootDir(conf); + FileSystem fs = rootDir.getFileSystem(conf); + return deleteDirectory(fs, + new Path(getTableDir(rootDir, hri.getTable()), hri.getEncodedName())); + } + + /** * Return the number of bytes that large input files should be optimally * be split into to minimize i/o time. * http://git-wip-us.apache.org/repos/asf/hbase/blob/01039eb2/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java index c01864c..719175a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -1895,6 +1895,44 @@ public class HBaseFsck extends Configured { return; } + HRegionInfo hri = hbi.getHdfsHRI(); + TableInfo tableInfo = tablesInfo.get(hri.getTable()); + if (tableInfo.regionsFromMeta.isEmpty()) { + for (HbckInfo h : regionInfoMap.values()) { + if (h.getTableName().equals(hri.getTable())) { + if (h.metaEntry != null) tableInfo.regionsFromMeta + .add((HRegionInfo) h.metaEntry); + } + } + Collections.sort(tableInfo.regionsFromMeta); + } + for (HRegionInfo region : tableInfo.regionsFromMeta) { + if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0 + && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(), + hri.getEndKey()) >= 0) + && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) { + if(region.isSplit() || region.isOffline()) continue; + Path regionDir = hbi.getHdfsRegionDir(); + FileSystem fs = regionDir.getFileSystem(getConf()); + List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir); + for (Path familyDir : familyDirs) { + List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir); + for (Path referenceFilePath : referenceFilePaths) { + Path parentRegionDir = + StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent(); + if (parentRegionDir.toString().endsWith(region.getEncodedName())) { + LOG.warn(hri + " start and stop keys are in the range of " + region + + ". The region might not be cleaned up from hdfs when region " + region + + " split failed. Hence deleting from hdfs."); + HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs, + regionDir.getParent(), hri); + return; + } + } + } + } + } + LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI()); HBaseFsckRepair.fixMetaHoleOnline(getConf(), hbi.getHdfsHRI()); @@ -2214,6 +2252,9 @@ public class HBaseFsck extends Configured { final Multimap<byte[], HbckInfo> overlapGroups = TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp); + // list of regions derived from meta entries. + final List<HRegionInfo> regionsFromMeta = new ArrayList<HRegionInfo>(); + TableInfo(TableName name) { this.tableName = name; deployedOn = new TreeSet <ServerName>(); http://git-wip-us.apache.org/repos/asf/hbase/blob/01039eb2/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java index 2f463a1..9e74314 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java @@ -1133,6 +1133,46 @@ public class TestSplitTransactionOnCluster { } } + @Test (timeout=300000) + public void testSSHCleanupDaugtherRegionsOfAbortedSplit() throws Exception { + TableName table = TableName.valueOf("testSSHCleanupDaugtherRegionsOfAbortedSplit"); + try { + HTableDescriptor desc = new HTableDescriptor(table); + desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f"))); + admin.createTable(desc); + HTable hTable = new HTable(cluster.getConfiguration(), desc.getTableName()); + for(int i = 1; i < 5; i++) { + Put p1 = new Put(("r"+i).getBytes()); + p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes()); + hTable.put(p1); + } + admin.flush(desc.getTableName().toString()); + List<HRegion> regions = cluster.getRegions(desc.getTableName()); + int serverWith = cluster.getServerWith(regions.get(0).getRegionName()); + HRegionServer regionServer = cluster.getRegionServer(serverWith); + cluster.getServerWith(regions.get(0).getRegionName()); + SplitTransaction st = new SplitTransaction(regions.get(0), Bytes.toBytes("r3")); + st.prepare(); + st.stepsBeforePONR(regionServer, regionServer, false); + Path tableDir = + FSUtils.getTableDir(cluster.getMaster().getMasterFileSystem().getRootDir(), + desc.getTableName()); + tableDir.getFileSystem(cluster.getConfiguration()); + List<Path> regionDirs = + FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir); + assertEquals(3,regionDirs.size()); + AssignmentManager am = cluster.getMaster().getAssignmentManager(); + am.processServerShutdown(regionServer.getServerName()); + assertEquals(am.getRegionStates().getRegionsInTransition().toString(), am.getRegionStates() + .getRegionsInTransition().size(), 0); + regionDirs = + FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir); + assertEquals(1,regionDirs.size()); + } finally { + TESTING_UTIL.deleteTable(table); + } + } + public static class MockedSplitTransaction extends SplitTransaction { private HRegion currentRegion; http://git-wip-us.apache.org/repos/asf/hbase/blob/01039eb2/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java index 2744531..b34b3b6 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java @@ -81,6 +81,7 @@ import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.hfile.TestHFile; import org.apache.hadoop.hbase.master.AssignmentManager; import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.master.RegionStates; import org.apache.hadoop.hbase.master.TableLockManager; import org.apache.hadoop.hbase.master.TableLockManager.TableLock; @@ -89,6 +90,7 @@ import org.apache.hadoop.hbase.protobuf.generated.AdminProtos; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.regionserver.SplitTransaction; import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction; import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter; import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE; @@ -98,6 +100,7 @@ import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo; import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker; import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil; import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker; +import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.zookeeper.KeeperException; import org.junit.AfterClass; import org.junit.Assert; @@ -2363,4 +2366,60 @@ public class TestHBaseFsck { Assert.assertEquals("shouldIgnorePreCheckPermission", true, hbck.shouldIgnorePreCheckPermission()); } + + @Test (timeout=180000) + public void testCleanUpDaughtersNotInMetaAfterFailedSplit() throws Exception { + TableName table = TableName.valueOf("testCleanUpDaughtersNotInMetaAfterFailedSplit"); + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + try { + HTableDescriptor desc = new HTableDescriptor(table); + desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f"))); + TEST_UTIL.getHBaseAdmin().createTable(desc); + tbl = new HTable(cluster.getConfiguration(), desc.getTableName()); + for (int i = 0; i < 5; i++) { + Put p1 = new Put(("r" + i).getBytes()); + p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes()); + tbl.put(p1); + } + TEST_UTIL.getHBaseAdmin().flush(desc.getTableName().toString()); + List<HRegion> regions = cluster.getRegions(desc.getTableName()); + int serverWith = cluster.getServerWith(regions.get(0).getRegionName()); + HRegionServer regionServer = cluster.getRegionServer(serverWith); + cluster.getServerWith(regions.get(0).getRegionName()); + SplitTransaction st = new SplitTransaction(regions.get(0), Bytes.toBytes("r3")); + st.prepare(); + st.stepsBeforePONR(regionServer, regionServer, false); + AssignmentManager am = cluster.getMaster().getAssignmentManager(); + Map<String, RegionState> regionsInTransition = am.getRegionStates().getRegionsInTransition(); + for (RegionState state : regionsInTransition.values()) { + am.regionOffline(state.getRegion()); + } + ZKAssign.deleteNodeFailSilent(regionServer.getZooKeeper(), regions.get(0).getRegionInfo()); + Map<HRegionInfo, ServerName> regionsMap = new HashMap<HRegionInfo, ServerName>(); + regionsMap.put(regions.get(0).getRegionInfo(), regionServer.getServerName()); + am.assign(regionsMap); + am.waitForAssignment(regions.get(0).getRegionInfo()); + HBaseFsck hbck = doFsck(conf, false); + assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + ERROR_CODE.NOT_IN_META_OR_DEPLOYED }); + // holes are separate from overlap groups + assertEquals(0, hbck.getOverlapGroups(table).size()); + + // fix hole + assertErrors( + doFsck(conf, false, true, false, false, false, false, false, false, false, false, null), + new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + ERROR_CODE.NOT_IN_META_OR_DEPLOYED }); + + // check that hole fixed + assertNoErrors(doFsck(conf, false)); + assertEquals(5, countRows()); + } finally { + if (tbl != null) { + tbl.close(); + tbl = null; + } + deleteTable(table); + } + } }
