This is an automated email from the ASF dual-hosted git repository. wchevreuil pushed a commit to branch branch-2.6 in repository https://gitbox.apache.org/repos/asf/hbase.git
commit 063e4cc5682c78754d472e6c6dbf85020a6a0749 Author: vinayak hegde <vinayakph...@gmail.com> AuthorDate: Fri Apr 12 14:54:37 2024 +0530 HBASE-28505 Implement enforcement to require Date Tiered Compaction for Time Range Data Tiering (#5809) Signed-off-by: Wellington Chevreuil <wchevre...@apache.org> Change-Id: I30772e5e4ea0e91f862327616a108bd1033fee89 --- .../hbase/regionserver/DataTieringManager.java | 2 +- .../hbase/regionserver/DateTieredStoreEngine.java | 3 + .../hadoop/hbase/util/TableDescriptorChecker.java | 36 +++++++++ .../hbase/client/TestIllegalTableDescriptor.java | 39 ++++++++++ .../hbase/regionserver/TestDataTieringManager.java | 89 +++++++++++++++------- 5 files changed, 142 insertions(+), 27 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java index 0bc04ddc428..2903963f706 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java @@ -199,7 +199,7 @@ public class DataTieringManager { private HStoreFile getHStoreFile(Path hFilePath) throws DataTieringException { HStore hStore = getHStore(hFilePath); for (HStoreFile file : hStore.getStorefiles()) { - if (file.getPath().equals(hFilePath)) { + if (file.getPath().toUri().getPath().toString().equals(hFilePath.toString())) { return file; } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DateTieredStoreEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DateTieredStoreEngine.java index ded6564bce5..26437ab1124 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DateTieredStoreEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DateTieredStoreEngine.java @@ -41,6 +41,9 @@ import org.apache.yetus.audience.InterfaceAudience; @InterfaceAudience.Private public class DateTieredStoreEngine extends StoreEngine<DefaultStoreFlusher, DateTieredCompactionPolicy, DateTieredCompactor, DefaultStoreFileManager> { + + public static final String DATE_TIERED_STORE_ENGINE = DateTieredStoreEngine.class.getName(); + @Override public boolean needsCompaction(List<HStoreFile> filesCompacting) { return compactionPolicy.needsCompaction(storeFileManager.getStoreFiles(), filesCompacting); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/TableDescriptorChecker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/TableDescriptorChecker.java index a826860aae4..409cc284dee 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/TableDescriptorChecker.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/TableDescriptorChecker.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hbase.util; +import static org.apache.hadoop.hbase.regionserver.DateTieredStoreEngine.DATE_TIERED_STORE_ENGINE; + import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.CompoundConfiguration; @@ -29,10 +31,13 @@ import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableDescriptorBuilder; import org.apache.hadoop.hbase.conf.ConfigKey; import org.apache.hadoop.hbase.fs.ErasureCodingUtils; +import org.apache.hadoop.hbase.regionserver.DataTieringManager; +import org.apache.hadoop.hbase.regionserver.DataTieringType; import org.apache.hadoop.hbase.regionserver.DefaultStoreEngine; import org.apache.hadoop.hbase.regionserver.HStore; import org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost; import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy; +import org.apache.hadoop.hbase.regionserver.StoreEngine; import org.apache.hadoop.hbase.regionserver.compactions.ExploringCompactionPolicy; import org.apache.hadoop.hbase.regionserver.compactions.FIFOCompactionPolicy; import org.apache.yetus.audience.InterfaceAudience; @@ -201,6 +206,8 @@ public final class TableDescriptorChecker { // check in-memory compaction warnOrThrowExceptionForFailure(logWarn, hcd::getInMemoryCompaction); + + checkDateTieredCompactionForTimeRangeDataTiering(conf, td); } } @@ -220,6 +227,35 @@ public final class TableDescriptorChecker { }); } + private static void checkDateTieredCompactionForTimeRangeDataTiering(final Configuration conf, + final TableDescriptor td) throws IOException { + // Table level configurations + checkDateTieredCompactionForTimeRangeDataTiering(conf); + for (ColumnFamilyDescriptor cfd : td.getColumnFamilies()) { + // Column family level configurations + Configuration cfdConf = + new CompoundConfiguration().add(conf).addStringMap(cfd.getConfiguration()); + checkDateTieredCompactionForTimeRangeDataTiering(cfdConf); + } + } + + private static void checkDateTieredCompactionForTimeRangeDataTiering(final Configuration conf) + throws IOException { + final String errorMessage = + "Time Range Data Tiering should be enabled with Date Tiered Compaction."; + + warnOrThrowExceptionForFailure(false, () -> { + + // Determine whether Date Tiered Compaction will be enabled when Time Range Data Tiering is + // enabled after the configuration change. + if (DataTieringType.TIME_RANGE.name().equals(conf.get(DataTieringManager.DATATIERING_KEY))) { + if (!DATE_TIERED_STORE_ENGINE.equals(conf.get(StoreEngine.STORE_ENGINE_CLASS_KEY))) { + throw new IllegalArgumentException(errorMessage); + } + } + }); + } + private static void checkCompactionPolicy(final Configuration conf, final TableDescriptor td) throws IOException { warnOrThrowExceptionForFailure(false, () -> { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestIllegalTableDescriptor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestIllegalTableDescriptor.java index c566432b4e7..2d45c05324b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestIllegalTableDescriptor.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestIllegalTableDescriptor.java @@ -33,6 +33,9 @@ import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.regionserver.DataTieringManager; +import org.apache.hadoop.hbase.regionserver.DataTieringType; +import org.apache.hadoop.hbase.regionserver.StoreEngine; import org.apache.hadoop.hbase.testclassification.ClientTests; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.util.Bytes; @@ -189,6 +192,42 @@ public class TestIllegalTableDescriptor { + "cause very frequent flushing.")); } + @Test + public void testIllegalTableDescriptorWithDataTiering() throws IOException { + // table level configuration changes + HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(name.getMethodName())); + HColumnDescriptor hcd = new HColumnDescriptor(FAMILY); + + // First scenario: DataTieringType set to TIME_RANGE without DateTieredStoreEngine + htd.setValue(DataTieringManager.DATATIERING_KEY, DataTieringType.TIME_RANGE.name()); + checkTableIsIllegal(htd); + + // Second scenario: DataTieringType set to TIME_RANGE with DateTieredStoreEngine + htd.setValue(StoreEngine.STORE_ENGINE_CLASS_KEY, + "org.apache.hadoop.hbase.regionserver.DateTieredStoreEngine"); + checkTableIsLegal(htd); + + // Third scenario: Disabling DateTieredStoreEngine while Time Range DataTiering is active + htd.setValue(StoreEngine.STORE_ENGINE_CLASS_KEY, + "org.apache.hadoop.hbase.regionserver.DefaultStoreEngine"); + checkTableIsIllegal(htd); + + // First scenario: DataTieringType set to TIME_RANGE without DateTieredStoreEngine + hcd.setConfiguration(DataTieringManager.DATATIERING_KEY, + DataTieringType.TIME_RANGE.name()); + checkTableIsIllegal(htd.addFamily(hcd)); + + // Second scenario: DataTieringType set to TIME_RANGE with DateTieredStoreEngine + hcd.setConfiguration(StoreEngine.STORE_ENGINE_CLASS_KEY, + "org.apache.hadoop.hbase.regionserver.DateTieredStoreEngine"); + checkTableIsLegal(htd.addFamily(hcd)); + + // Third scenario: Disabling DateTieredStoreEngine while Time Range DataTiering is active + hcd.setConfiguration(StoreEngine.STORE_ENGINE_CLASS_KEY, + "org.apache.hadoop.hbase.regionserver.DefaultStoreEngine"); + checkTableIsIllegal(htd.addFamily(hcd)); + } + private void checkTableIsLegal(HTableDescriptor htd) throws IOException { Admin admin = TEST_UTIL.getAdmin(); admin.createTable(htd); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java index afb5862a8a4..54853944583 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java @@ -19,19 +19,19 @@ package org.apache.hadoop.hbase.regionserver; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; - import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Random; import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseClassTestRule; -import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; @@ -47,6 +47,8 @@ import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; import org.apache.hadoop.hbase.io.hfile.BlockType; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; +import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker; +import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; import org.apache.hadoop.hbase.testclassification.RegionServerTests; import org.apache.hadoop.hbase.testclassification.SmallTests; import org.apache.hadoop.hbase.util.Bytes; @@ -82,15 +84,21 @@ public class TestDataTieringManager { public static final HBaseClassTestRule CLASS_RULE = HBaseClassTestRule.forClass(TestDataTieringManager.class); - private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); private static Configuration defaultConf; private static FileSystem fs; private static CacheConfig cacheConf; private static Path testDir; - private static Map<String, HRegion> testOnlineRegions; - + private static final Map<String, HRegion> testOnlineRegions = new HashMap<>(); private static DataTieringManager dataTieringManager; - private static List<HStoreFile> hStoreFiles; + private static final List<HStoreFile> hStoreFiles = new ArrayList<>(); + + /** + * Represents the current lexicographically increasing string used as a row key when writing + * HFiles. It is incremented each time {@link #nextString()} is called to generate unique row + * keys. + */ + private static String rowKeyString; @BeforeClass public static void setupBeforeClass() throws Exception { @@ -271,21 +279,20 @@ public class TestDataTieringManager { } private static void setupOnlineRegions() throws IOException { - testOnlineRegions = new HashMap<>(); - hStoreFiles = new ArrayList<>(); - + testOnlineRegions.clear(); + hStoreFiles.clear(); long day = 24 * 60 * 60 * 1000; long currentTime = System.currentTimeMillis(); HRegion region1 = createHRegion("table1"); HStore hStore11 = createHStore(region1, "cf1", getConfWithTimeRangeDataTieringEnabled(day)); - hStoreFiles - .add(createHStoreFile(hStore11.getStoreContext().getFamilyStoreDirectoryPath(), currentTime)); + hStoreFiles.add(createHStoreFile(hStore11.getStoreContext().getFamilyStoreDirectoryPath(), + hStore11.getReadOnlyConfiguration(), currentTime, region1.getRegionFileSystem())); hStore11.refreshStoreFiles(); HStore hStore12 = createHStore(region1, "cf2"); hStoreFiles.add(createHStoreFile(hStore12.getStoreContext().getFamilyStoreDirectoryPath(), - currentTime - day)); + hStore12.getReadOnlyConfiguration(), currentTime - day, region1.getRegionFileSystem())); hStore12.refreshStoreFiles(); region1.stores.put(Bytes.toBytes("cf1"), hStore11); @@ -296,11 +303,11 @@ public class TestDataTieringManager { HStore hStore21 = createHStore(region2, "cf1"); hStoreFiles.add(createHStoreFile(hStore21.getStoreContext().getFamilyStoreDirectoryPath(), - currentTime - 2 * day)); + hStore21.getReadOnlyConfiguration(), currentTime - 2 * day, region2.getRegionFileSystem())); hStore21.refreshStoreFiles(); HStore hStore22 = createHStore(region2, "cf2"); hStoreFiles.add(createHStoreFile(hStore22.getStoreContext().getFamilyStoreDirectoryPath(), - currentTime - 3 * day)); + hStore22.getReadOnlyConfiguration(), currentTime - 3 * day, region2.getRegionFileSystem())); hStore22.refreshStoreFiles(); region2.stores.put(Bytes.toBytes("cf1"), hStore21); @@ -359,31 +366,61 @@ public class TestDataTieringManager { return conf; } - private static HStoreFile createHStoreFile(Path storeDir, long timestamp) throws IOException { + + static HStoreFile createHStoreFile(Path storeDir, Configuration conf, long timestamp, + HRegionFileSystem regionFs) throws IOException { String columnFamily = storeDir.getName(); - StoreFileWriter storeFileWriter = new StoreFileWriter.Builder(defaultConf, cacheConf, fs) + StoreFileWriter storeFileWriter = new StoreFileWriter.Builder(conf, cacheConf, fs) .withOutputDir(storeDir).withFileContext(new HFileContextBuilder().build()).build(); - writeStoreFileRandomData(storeFileWriter, Bytes.toBytes(columnFamily), Bytes.toBytes("random"), - timestamp); + writeStoreFileRandomData(storeFileWriter, Bytes.toBytes(columnFamily), timestamp); + + StoreContext storeContext = StoreContext.getBuilder().withRegionFileSystem(regionFs).build(); - return new HStoreFile(fs, storeFileWriter.getPath(), defaultConf, cacheConf, BloomType.NONE, - true); + StoreFileTracker sft = StoreFileTrackerFactory.create(conf, true, storeContext); + return new HStoreFile(fs, storeFileWriter.getPath(), conf, cacheConf, BloomType.NONE, true, + sft); } private static void writeStoreFileRandomData(final StoreFileWriter writer, byte[] columnFamily, - byte[] qualifier, long timestamp) throws IOException { + long timestamp) throws IOException { + int cellsPerFile = 10; + byte[] qualifier = Bytes.toBytes("qualifier"); + byte[] value = generateRandomBytes(4 * 1024); try { - for (char d = 'a'; d <= 'z'; d++) { - for (char e = 'a'; e <= 'z'; e++) { - byte[] b = new byte[] { (byte) d, (byte) e }; - writer.append(new KeyValue(b, columnFamily, qualifier, timestamp, b)); - } + for (int i = 0; i < cellsPerFile; i++) { + byte[] row = Bytes.toBytes(nextString()); + writer.append(new KeyValue(row, columnFamily, qualifier, timestamp, value)); } } finally { writer.appendTrackedTimestampsToMetadata(); writer.close(); } } + + + private static byte[] generateRandomBytes(int sizeInBytes) { + Random random = new Random(); + byte[] randomBytes = new byte[sizeInBytes]; + random.nextBytes(randomBytes); + return randomBytes; + } + + /** + * Returns the lexicographically larger string every time it's called. + */ + private static String nextString() { + if (rowKeyString == null || rowKeyString.isEmpty()) { + rowKeyString = "a"; + } + char lastChar = rowKeyString.charAt(rowKeyString.length() - 1); + if (lastChar < 'z') { + rowKeyString = rowKeyString.substring(0, rowKeyString.length() - 1) + (char) (lastChar + 1); + } else { + rowKeyString = rowKeyString + "a"; + } + return rowKeyString; + } + }