This is an automated email from the ASF dual-hosted git repository. wchevreuil pushed a commit to branch branch-2.6 in repository https://gitbox.apache.org/repos/asf/hbase.git
commit 3e114641e54d10c78ca5d3d7179c1bca4e73de2c Author: vinayak hegde <vinayakph...@gmail.com> AuthorDate: Mon Apr 22 15:23:30 2024 +0530 HBASE-28466 Integration of time-based priority logic of bucket cache in prefetch functionality of HBase (#5808) Signed-off-by: Wellington Chevreuil <wchevre...@apache.org> Change-Id: I374a9ab88807da3188962d7ac0b2b727eaffd010 --- .../apache/hadoop/hbase/io/hfile/BlockCache.java | 5 +- .../hadoop/hbase/io/hfile/CombinedBlockCache.java | 6 +- .../apache/hadoop/hbase/io/hfile/HFileInfo.java | 6 ++ .../hadoop/hbase/io/hfile/HFilePreadReader.java | 2 + .../hadoop/hbase/io/hfile/bucket/BucketCache.java | 15 +++- .../hbase/regionserver/DataTieringManager.java | 91 +++++++++++++++++----- .../hbase/regionserver/TestDataTieringManager.java | 57 ++++++++++++-- 7 files changed, 150 insertions(+), 32 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java index 8f12e367e58..8380fc194e7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java @@ -184,11 +184,12 @@ public interface BlockCache extends Iterable<CachedBlock>, ConfigurationObserver * overridden by all implementing classes. In such cases, the returned Optional will be empty. For * subclasses implementing this logic, the returned Optional would contain the boolean value * reflecting if the passed file should indeed be cached. - * @param fileName to check if it should be cached. + * @param hFileInfo Information about the file to check if it should be cached. + * @param conf The configuration object to use for determining caching behavior. * @return empty optional if this method is not supported, otherwise the returned optional * contains the boolean value informing if the file should be cached. */ - default Optional<Boolean> shouldCacheFile(String fileName) { + default Optional<Boolean> shouldCacheFile(HFileInfo hFileInfo, Configuration conf) { return Optional.empty(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java index 61976a86f83..fe675aade7b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java @@ -477,9 +477,9 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize { } @Override - public Optional<Boolean> shouldCacheFile(String fileName) { - Optional<Boolean> l1Result = l1Cache.shouldCacheFile(fileName); - Optional<Boolean> l2Result = l2Cache.shouldCacheFile(fileName); + public Optional<Boolean> shouldCacheFile(HFileInfo hFileInfo, Configuration conf) { + Optional<Boolean> l1Result = l1Cache.shouldCacheFile(hFileInfo, conf); + Optional<Boolean> l2Result = l2Cache.shouldCacheFile(hFileInfo, conf); final Mutable<Boolean> combinedResult = new MutableBoolean(true); l1Result.ifPresent(b -> combinedResult.setValue(b && combinedResult.getValue())); l2Result.ifPresent(b -> combinedResult.setValue(b && combinedResult.getValue())); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileInfo.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileInfo.java index e1623537385..b3ccd248780 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileInfo.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileInfo.java @@ -121,6 +121,7 @@ public class HFileInfo implements SortedMap<byte[], byte[]> { private FixedFileTrailer trailer; private HFileContext hfileContext; + private boolean initialized = false; public HFileInfo() { super(); @@ -361,6 +362,10 @@ public class HFileInfo implements SortedMap<byte[], byte[]> { * should be called after initTrailerAndContext */ public void initMetaAndIndex(HFile.Reader reader) throws IOException { + if (initialized) { + return; + } + ReaderContext context = reader.getContext(); try { HFileBlock.FSReader blockReader = reader.getUncachedBlockReader(); @@ -398,6 +403,7 @@ public class HFileInfo implements SortedMap<byte[], byte[]> { throw new CorruptHFileException( "Problem reading data index and meta index from file " + context.getFilePath(), t); } + initialized = true; } private HFileContext createHFileContext(Path path, FixedFileTrailer trailer, Configuration conf) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePreadReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePreadReader.java index 08649ebb315..3ef5f50db02 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePreadReader.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePreadReader.java @@ -35,6 +35,8 @@ public class HFilePreadReader extends HFileReaderImpl { public HFilePreadReader(ReaderContext context, HFileInfo fileInfo, CacheConfig cacheConf, Configuration conf) throws IOException { super(context, fileInfo, cacheConf, conf); + // Initialize HFileInfo object with metadata for caching decisions + fileInfo.initMetaAndIndex(this); // master hosted regions, like the master procedures store wouldn't have a block cache // Prefetch file blocks upon open if requested if (cacheConf.getBlockCache().isPresent() && cacheConf.shouldPrefetchOnOpen()) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java index 28b2b05936c..569a04bda8f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java @@ -79,9 +79,11 @@ import org.apache.hadoop.hbase.io.hfile.CachedBlock; import org.apache.hadoop.hbase.io.hfile.CombinedBlockCache; import org.apache.hadoop.hbase.io.hfile.HFileBlock; import org.apache.hadoop.hbase.io.hfile.HFileContext; +import org.apache.hadoop.hbase.io.hfile.HFileInfo; import org.apache.hadoop.hbase.nio.ByteBuff; import org.apache.hadoop.hbase.nio.RefCnt; import org.apache.hadoop.hbase.protobuf.ProtobufMagic; +import org.apache.hadoop.hbase.regionserver.DataTieringManager; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.StoreFileInfo; import org.apache.hadoop.hbase.util.Bytes; @@ -2317,7 +2319,18 @@ public class BucketCache implements BlockCache, HeapSize { } @Override - public Optional<Boolean> shouldCacheFile(String fileName) { + public Optional<Boolean> shouldCacheFile(HFileInfo hFileInfo, Configuration conf) { + String fileName = hFileInfo.getHFileContext().getHFileName(); + try { + DataTieringManager dataTieringManager = DataTieringManager.getInstance(); + if (!dataTieringManager.isHotData(hFileInfo, conf)) { + LOG.debug("Data tiering is enabled for file: '{}' and it is not hot data", fileName); + return Optional.of(false); + } + } catch (IllegalStateException e) { + LOG.error("Error while getting DataTieringManager instance: {}", e.getMessage()); + } + // if we don't have the file in fullyCachedFiles, we should cache it return Optional.of(!fullyCachedFiles.containsKey(fileName)); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java index 2903963f706..b781052efa9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java @@ -17,6 +17,9 @@ */ package org.apache.hadoop.hbase.regionserver; +import static org.apache.hadoop.hbase.regionserver.HStoreFile.TIMERANGE_KEY; + +import java.io.IOException; import java.util.HashSet; import java.util.Map; import java.util.OptionalLong; @@ -24,6 +27,7 @@ import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; +import org.apache.hadoop.hbase.io.hfile.HFileInfo; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.yetus.audience.InterfaceAudience; @@ -106,11 +110,13 @@ public class DataTieringManager { } /** - * Determines whether the data associated with the given block cache key is considered hot. + * Determines whether the data associated with the given block cache key is considered hot. If the + * data tiering type is set to {@link DataTieringType#TIME_RANGE} and maximum timestamp is not + * present, it considers {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by + * default. * @param key the block cache key * @return {@code true} if the data is hot, {@code false} otherwise - * @throws DataTieringException if there is an error retrieving data tiering information or the - * HFile maximum timestamp + * @throws DataTieringException if there is an error retrieving data tiering information */ public boolean isHotData(BlockCacheKey key) throws DataTieringException { Path hFilePath = key.getFilePath(); @@ -122,37 +128,82 @@ public class DataTieringManager { /** * Determines whether the data in the HFile at the given path is considered hot based on the - * configured data tiering type and hot data age. + * configured data tiering type and hot data age. If the data tiering type is set to + * {@link DataTieringType#TIME_RANGE} and maximum timestamp is not present, it considers + * {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by default. * @param hFilePath the path to the HFile * @return {@code true} if the data is hot, {@code false} otherwise - * @throws DataTieringException if there is an error retrieving data tiering information or the - * HFile maximum timestamp + * @throws DataTieringException if there is an error retrieving data tiering information */ public boolean isHotData(Path hFilePath) throws DataTieringException { Configuration configuration = getConfiguration(hFilePath); DataTieringType dataTieringType = getDataTieringType(configuration); if (dataTieringType.equals(DataTieringType.TIME_RANGE)) { - long hotDataAge = getDataTieringHotDataAge(configuration); - - HStoreFile hStoreFile = getHStoreFile(hFilePath); - if (hStoreFile == null) { - LOG.error("HStoreFile corresponding to " + hFilePath + " doesn't exist"); - return false; - } - OptionalLong maxTimestamp = hStoreFile.getMaximumTimestamp(); - if (!maxTimestamp.isPresent()) { - throw new DataTieringException("Maximum timestamp not present for " + hFilePath); - } + return hotDataValidator(getMaxTimestamp(hFilePath), getDataTieringHotDataAge(configuration)); + } + // DataTieringType.NONE or other types are considered hot by default + return true; + } - long currentTimestamp = EnvironmentEdgeManager.getDelegate().currentTime(); - long diff = currentTimestamp - maxTimestamp.getAsLong(); - return diff <= hotDataAge; + /** + * Determines whether the data in the HFile being read is considered hot based on the configured + * data tiering type and hot data age. If the data tiering type is set to + * {@link DataTieringType#TIME_RANGE} and maximum timestamp is not present, it considers + * {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by default. + * @param hFileInfo Information about the HFile to determine if its data is hot. + * @param configuration The configuration object to use for determining hot data criteria. + * @return {@code true} if the data is hot, {@code false} otherwise + */ + public boolean isHotData(HFileInfo hFileInfo, Configuration configuration) { + DataTieringType dataTieringType = getDataTieringType(configuration); + if (dataTieringType.equals(DataTieringType.TIME_RANGE)) { + return hotDataValidator(getMaxTimestamp(hFileInfo), getDataTieringHotDataAge(configuration)); } // DataTieringType.NONE or other types are considered hot by default return true; } + private boolean hotDataValidator(long maxTimestamp, long hotDataAge) { + long currentTimestamp = getCurrentTimestamp(); + long diff = currentTimestamp - maxTimestamp; + return diff <= hotDataAge; + } + + private long getMaxTimestamp(Path hFilePath) throws DataTieringException { + HStoreFile hStoreFile = getHStoreFile(hFilePath); + if (hStoreFile == null) { + LOG.error("HStoreFile corresponding to " + hFilePath + " doesn't exist"); + return Long.MAX_VALUE; + } + OptionalLong maxTimestamp = hStoreFile.getMaximumTimestamp(); + if (!maxTimestamp.isPresent()) { + LOG.error("Maximum timestamp not present for " + hFilePath); + return Long.MAX_VALUE; + } + return maxTimestamp.getAsLong(); + } + + private long getMaxTimestamp(HFileInfo hFileInfo) { + try { + byte[] hFileTimeRange = hFileInfo.get(TIMERANGE_KEY); + if (hFileTimeRange == null) { + LOG.error("Timestamp information not found for file: {}", + hFileInfo.getHFileContext().getHFileName()); + return Long.MAX_VALUE; + } + return TimeRangeTracker.parseFrom(hFileTimeRange).getMax(); + } catch (IOException e) { + LOG.error("Error occurred while reading the timestamp metadata of file: {}", + hFileInfo.getHFileContext().getHFileName(), e); + return Long.MAX_VALUE; + } + } + + private long getCurrentTimestamp() { + return EnvironmentEdgeManager.getDelegate().currentTime(); + } + /** * Returns a set of cold data filenames from the given set of cached blocks. Cold data is * determined by the configured data tiering type and hot data age. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java index 54853944583..3e99d453d5b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java @@ -17,7 +17,9 @@ */ package org.apache.hadoop.hbase.regionserver; +import static org.apache.hadoop.hbase.HConstants.BUCKET_CACHE_SIZE_KEY; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.IOException; import java.util.ArrayList; @@ -26,14 +28,17 @@ import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Random; +import java.util.Optional; import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.Waiter; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; import org.apache.hadoop.hbase.client.RegionInfo; @@ -53,6 +58,7 @@ import org.apache.hadoop.hbase.testclassification.RegionServerTests; import org.apache.hadoop.hbase.testclassification.SmallTests; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.CommonFSUtils; +import org.apache.hadoop.hbase.util.Pair; import org.junit.BeforeClass; import org.junit.ClassRule; import org.junit.Test; @@ -87,9 +93,11 @@ public class TestDataTieringManager { private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); private static Configuration defaultConf; private static FileSystem fs; + private static BlockCache blockCache; private static CacheConfig cacheConf; private static Path testDir; private static final Map<String, HRegion> testOnlineRegions = new HashMap<>(); + private static DataTieringManager dataTieringManager; private static final List<HStoreFile> hStoreFiles = new ArrayList<>(); @@ -104,11 +112,14 @@ public class TestDataTieringManager { public static void setupBeforeClass() throws Exception { testDir = TEST_UTIL.getDataTestDir(TestDataTieringManager.class.getSimpleName()); defaultConf = TEST_UTIL.getConfiguration(); + defaultConf.setBoolean(CacheConfig.PREFETCH_BLOCKS_ON_OPEN_KEY, true); + defaultConf.setStrings(HConstants.BUCKET_CACHE_IOENGINE_KEY, "offheap"); + defaultConf.setLong(BUCKET_CACHE_SIZE_KEY, 32); fs = HFileSystem.get(defaultConf); - BlockCache blockCache = BlockCacheFactory.createBlockCache(defaultConf); + blockCache = BlockCacheFactory.createBlockCache(defaultConf); cacheConf = new CacheConfig(defaultConf, blockCache); - setupOnlineRegions(); DataTieringManager.instantiate(testOnlineRegions); + setupOnlineRegions(); dataTieringManager = DataTieringManager.getInstance(); } @@ -197,7 +208,30 @@ public class TestDataTieringManager { // Test with a filename where corresponding HStoreFile in not present hFilePath = new Path(hStoreFiles.get(0).getPath().getParent(), "incorrectFileName"); - testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, false); + testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, true); + } + + @Test + public void testPrefetchWhenDataTieringEnabled() throws IOException { + setPrefetchBlocksOnOpen(); + initializeTestEnvironment(); + // Evict blocks from cache by closing the files and passing evict on close. + // Then initialize the reader again. Since Prefetch on open is set to true, it should prefetch + // those blocks. + for (HStoreFile file : hStoreFiles) { + file.closeStoreFile(true); + file.initReader(); + } + + // Since we have one cold file among four files, only three should get prefetched. + Optional<Map<String, Pair<String, Long>>> fullyCachedFiles = blockCache.getFullyCachedFiles(); + assertTrue("We should get the fully cached files from the cache", fullyCachedFiles.isPresent()); + Waiter.waitFor(defaultConf, 10000, () -> fullyCachedFiles.get().size() == 3); + assertEquals("Number of fully cached files are incorrect", 3, fullyCachedFiles.get().size()); + } + + private void setPrefetchBlocksOnOpen() { + defaultConf.setBoolean(CacheConfig.PREFETCH_BLOCKS_ON_OPEN_KEY, true); } @Test @@ -278,6 +312,17 @@ public class TestDataTieringManager { testDataTieringMethodWithKey(caller, key, expectedResult, null); } + private static void initializeTestEnvironment() throws IOException { + setupFileSystemAndCache(); + setupOnlineRegions(); + } + + private static void setupFileSystemAndCache() throws IOException { + fs = HFileSystem.get(defaultConf); + blockCache = BlockCacheFactory.createBlockCache(defaultConf); + cacheConf = new CacheConfig(defaultConf, blockCache); + } + private static void setupOnlineRegions() throws IOException { testOnlineRegions.clear(); hStoreFiles.clear(); @@ -376,11 +421,11 @@ public class TestDataTieringManager { writeStoreFileRandomData(storeFileWriter, Bytes.toBytes(columnFamily), timestamp); - StoreContext storeContext = StoreContext.getBuilder().withRegionFileSystem(regionFs).build(); + StoreContext storeContext = + StoreContext.getBuilder().withRegionFileSystem(regionFs).build(); StoreFileTracker sft = StoreFileTrackerFactory.create(conf, true, storeContext); - return new HStoreFile(fs, storeFileWriter.getPath(), conf, cacheConf, BloomType.NONE, true, - sft); + return new HStoreFile(fs, storeFileWriter.getPath(), conf, cacheConf, BloomType.NONE, true, sft); } private static void writeStoreFileRandomData(final StoreFileWriter writer, byte[] columnFamily,