HBASE-20940 HStore.cansplit should not allow split to happen if it has references (Vishal Khandelwal)
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/56c59f11 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/56c59f11 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/56c59f11 Branch: refs/heads/branch-1 Commit: 56c59f11e38e24e232cb2f1c126c250edd700366 Parents: 8716ac2 Author: Andrew Purtell <[email protected]> Authored: Fri Aug 17 15:01:44 2018 -0700 Committer: Andrew Purtell <[email protected]> Committed: Fri Aug 17 15:30:40 2018 -0700 ---------------------------------------------------------------------- .../hadoop/hbase/regionserver/HStore.java | 12 +- .../hbase/io/encoding/TestChangingEncoding.java | 4 + .../hbase/master/TestAssignmentListener.java | 21 +++- .../TestEndToEndSplitTransaction.java | 123 +++++++++++++++++-- .../hadoop/hbase/regionserver/TestHRegion.java | 8 ++ .../TestSplitTransactionOnCluster.java | 15 ++- 6 files changed, 168 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/56c59f11/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index a017820..6ce41e3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -1636,7 +1636,17 @@ public class HStore implements Store { @Override public boolean hasReferences() { - return StoreUtils.hasReferences(this.storeEngine.getStoreFileManager().getStorefiles()); + List<StoreFile> reloadedStoreFiles = null; + try { + // Reloading the store files from file system due to HBASE-20940. As split can happen with an + // region which has references + reloadedStoreFiles = loadStoreFiles(); + } catch (IOException ioe) { + LOG.error("Error trying to determine if store has referenes, " + "assuming references exists", + ioe); + return true; + } + return StoreUtils.hasReferences(reloadedStoreFiles); } @Override http://git-wip-us.apache.org/repos/asf/hbase/blob/56c59f11/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java index 621d3f8..d038c5f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java @@ -105,6 +105,10 @@ public class TestChangingEncoding { conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024 * 1024); // ((Log4JLogger)RpcServerImplementation.LOG).getLogger().setLevel(Level.TRACE); // ((Log4JLogger)RpcClient.LOG).getLogger().setLevel(Level.TRACE); + // Disabling split to make sure split does not cause modify column to wait which timesout test + // sometime + conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, + "org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy"); conf.setBoolean("hbase.online.schema.update.enable", true); TEST_UTIL.startMiniCluster(); } http://git-wip-us.apache.org/repos/asf/hbase/blob/56c59f11/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java index bd4251b..12d04ff 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.logging.Log; @@ -33,19 +34,21 @@ import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.MiniHBaseCluster; -import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.ServerLoad; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.Region; +import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.JVMClusterUtil; +import org.apache.hadoop.hbase.util.RetryCounter; import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker; import org.apache.hadoop.hbase.zookeeper.RegionServerTracker; import org.apache.hadoop.hbase.zookeeper.ZKUtil; @@ -256,6 +259,20 @@ public class TestAssignmentListener { while (mergeable < 2) { Thread.sleep(100); admin.majorCompact(TABLE_NAME); + + // Waiting for compaction to finish + RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS); + while (CompactionState.NONE != admin.getCompactionState(TABLE_NAME) + && retrier.shouldRetry()) { + retrier.sleepUntilNextRetry(); + } + + // Making sure references are getting cleaned after compaction. + // This is taken care by discharger + for (HRegion region : TEST_UTIL.getHBaseCluster().getRegions(TABLE_NAME)) { + region.getStores().get(0).closeAndArchiveCompactedFiles(); + } + mergeable = 0; for (JVMClusterUtil.RegionServerThread regionThread: miniCluster.getRegionServerThreads()) { for (Region region: regionThread.getRegionServer().getOnlineRegions(TABLE_NAME)) { http://git-wip-us.apache.org/repos/asf/hbase/blob/56c59f11/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java index b133224..86662a4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java @@ -22,17 +22,15 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import com.google.common.collect.Iterators; -import com.google.common.collect.Sets; -import com.google.protobuf.ServiceException; - import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.NavigableMap; import java.util.Random; import java.util.Set; import java.util.TreeSet; +import java.util.concurrent.TimeUnit; import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; @@ -40,9 +38,11 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ChoreService; import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.ScheduledChore; @@ -63,6 +63,7 @@ import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager; import org.apache.hadoop.hbase.ipc.HBaseRpcControllerImpl; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.RequestConverter; +import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState; import org.apache.hadoop.hbase.protobuf.generated.ClientProtos; import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest; import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos; @@ -70,13 +71,20 @@ import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.PairOfSameType; +import org.apache.hadoop.hbase.util.RetryCounter; import org.apache.hadoop.hbase.util.StoppableImplementation; import org.apache.hadoop.hbase.util.Threads; import org.junit.AfterClass; +import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; +import com.google.common.collect.Iterators; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import com.google.protobuf.ServiceException; + @Category(LargeTests.class) public class TestEndToEndSplitTransaction { private static final Log LOG = LogFactory.getLog(TestEndToEndSplitTransaction.class); @@ -94,6 +102,79 @@ public class TestEndToEndSplitTransaction { TEST_UTIL.shutdownMiniCluster(); } + /* + * This is the test for : HBASE-20940 This test will split the region and try to open an reference + * over store file. Once store file has any reference, it makes sure that region can't be split + * @throws Exception + */ + @Test + public void testCanSplitJustAfterASplit() throws Exception { + LOG.info("Starting testCanSplitJustAfterASplit"); + byte[] fam = Bytes.toBytes("cf_split"); + + TableName tableName = TableName.valueOf("CanSplitTable"); + Table source = TEST_UTIL.getConnection().getTable(tableName); + Admin admin = TEST_UTIL.getHBaseAdmin(); + Map<String, StoreFile.Reader> scanner = Maps.newHashMap(); + + try { + HTableDescriptor htd = new HTableDescriptor(tableName); + htd.addFamily(new HColumnDescriptor(fam)); + + admin.createTable(htd); + TEST_UTIL.loadTable(source, fam); + List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(tableName); + regions.get(0).forceSplit(null); + admin.split(tableName); + + while (regions.size() <= 1) { + regions = TEST_UTIL.getHBaseCluster().getRegions(tableName); + // Trying to get reference of the file before region split completes + // this would try to get handle on storefile or it can not be cleaned + for (HRegion region : regions) { + for (Store store : region.getStores()) { + for (StoreFile file : store.getStorefiles()) { + StoreFile.Reader reader = file.getReader(); + reader.getStoreFileScanner(false, false, false, 0, 0, false); + scanner.put(region.getRegionInfo().getEncodedName(), reader); + LOG.info("Got reference to file = " + file.getPath() + ",for region = " + + region.getRegionInfo().getEncodedName()); + } + } + } + } + + Assert.assertTrue("Regions did not split properly", regions.size() > 1); + Assert.assertTrue("Could not get reference any of the store file", scanner.size() > 1); + + RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS); + while (CompactionState.NONE != admin.getCompactionState(tableName) && retrier.shouldRetry()) { + retrier.sleepUntilNextRetry(); + } + + Assert.assertEquals("Compaction did not complete in 30 secs", CompactionState.NONE, + admin.getCompactionState(tableName)); + + for (HRegion region : regions) { + for (Store store : region.getStores()) { + Assert.assertTrue("Contains an open file reference which can be split", + !scanner.containsKey(region.getRegionInfo().getEncodedName()) || !store.canSplit()); + } + } + } finally { + for (StoreFile.Reader s : scanner.values()) { + try { + s.close(true); + } catch (IOException e) { + e.printStackTrace(); + } + } + scanner.clear(); + if (source != null) source.close(); + TEST_UTIL.deleteTableIfAny(tableName); + } + } + @Test public void testMasterOpsWhileSplitting() throws Exception { TableName tableName = TableName.valueOf("TestSplit"); @@ -130,10 +211,10 @@ public class TestEndToEndSplitTransaction { if (split.useZKForAssignment) { server.postOpenDeployTasks(regions.getSecond()); } else { - server.reportRegionStateTransition( - RegionServerStatusProtos.RegionStateTransition.TransitionCode.SPLIT, - region.getRegionInfo(), regions.getFirst().getRegionInfo(), - regions.getSecond().getRegionInfo()); + server.reportRegionStateTransition( + RegionServerStatusProtos.RegionStateTransition.TransitionCode.SPLIT, + region.getRegionInfo(), regions.getFirst().getRegionInfo(), + regions.getSecond().getRegionInfo()); } // first daughter second @@ -157,8 +238,8 @@ public class TestEndToEndSplitTransaction { if (split.useZKForAssignment) { // 4. phase III ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) - .getSplitTransactionCoordination().completeSplitTransaction(server, regions.getFirst(), - regions.getSecond(), split.std, region); + .getSplitTransactionCoordination().completeSplitTransaction(server, regions.getFirst(), + regions.getSecond(), split.std, region); } assertTrue(test(conn, tableName, firstRow, server)); @@ -552,6 +633,28 @@ public class TestEndToEndSplitTransaction { rem = timeout - (System.currentTimeMillis() - start); blockUntilRegionIsOpened(conf, rem, daughterB); + + // Compacting the new region to make sure references can be cleaned up + compactAndBlockUntilDone(TEST_UTIL.getHBaseAdmin(), + TEST_UTIL.getMiniHBaseCluster().getRegionServer(0), daughterA.getRegionName()); + compactAndBlockUntilDone(TEST_UTIL.getHBaseAdmin(), + TEST_UTIL.getMiniHBaseCluster().getRegionServer(0), daughterB.getRegionName()); + + removeCompactedfiles(conn, timeout, daughterA); + removeCompactedfiles(conn, timeout, daughterB); + } + } + } + + public static void removeCompactedfiles(Connection conn, long timeout, HRegionInfo hri) + throws IOException, InterruptedException { + log("removeCompactedfiles for : " + hri.getRegionNameAsString()); + List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(hri.getTable()); + for (HRegion region : regions) { + try { + region.getStores().get(0).closeAndArchiveCompactedFiles(); + } catch (IOException e) { + e.printStackTrace(); } } } http://git-wip-us.apache.org/repos/asf/hbase/blob/56c59f11/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java index f7d57b5..a10f48d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java @@ -2686,6 +2686,10 @@ public class TestHRegion { LOG.info("" + HBaseTestCase.addContent(region, fam3)); region.flush(true); region.compactStores(); + region.waitForFlushesAndCompactions(); + for(Store s:region.getStores()) { + s.closeAndArchiveCompactedFiles(); + } byte[] splitRow = region.checkSplit(); assertNotNull(splitRow); LOG.info("SplitRow: " + Bytes.toString(splitRow)); @@ -2695,6 +2699,10 @@ public class TestHRegion { for (int i = 0; i < subregions.length; i++) { HRegion.openHRegion(subregions[i], null); subregions[i].compactStores(); + subregions[i].waitForFlushesAndCompactions(); + for(Store s:subregions[i].getStores()) { + s.closeAndArchiveCompactedFiles(); + } } Path oldRegionPath = region.getRegionFileSystem().getRegionDir(); Path oldRegion1 = subregions[0].getRegionFileSystem().getRegionDir(); http://git-wip-us.apache.org/repos/asf/hbase/blob/56c59f11/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java index 4c11c61..32700af 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java @@ -27,12 +27,12 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.IOException; -import java.io.InterruptedIOException; import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -88,6 +88,7 @@ import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.master.RegionStates; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState; import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController; import org.apache.hadoop.hbase.testclassification.LargeTests; @@ -97,6 +98,7 @@ import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.HBaseFsck; import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; import org.apache.hadoop.hbase.util.PairOfSameType; +import org.apache.hadoop.hbase.util.RetryCounter; import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.hadoop.hbase.zookeeper.ZKUtil; @@ -605,11 +607,20 @@ public class TestSplitTransactionOnCluster { // Compact first to ensure we have cleaned up references -- else the split // will fail. this.admin.compact(daughter.getRegionName()); + RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS); + while (CompactionState.NONE != admin.getCompactionStateForRegion(daughter.getRegionName()) + && retrier.shouldRetry()) { + retrier.sleepUntilNextRetry(); + } + daughters = cluster.getRegions(tableName); HRegion daughterRegion = null; - for (HRegion r: daughters) { + for (HRegion r : daughters) { if (r.getRegionInfo().equals(daughter)) { daughterRegion = r; + // Archiving the compacted references file + r.getStores().get(0).closeAndArchiveCompactedFiles(); + LOG.info("Found matching HRI: " + daughterRegion); break; }
