This is an automated email from the ASF dual-hosted git repository.

wchevreuil pushed a commit to branch branch-2.6
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 3e114641e54d10c78ca5d3d7179c1bca4e73de2c
Author: vinayak hegde <vinayakph...@gmail.com>
AuthorDate: Mon Apr 22 15:23:30 2024 +0530

    HBASE-28466 Integration of time-based priority logic of bucket cache in 
prefetch functionality of HBase (#5808)
    
    Signed-off-by: Wellington Chevreuil <wchevre...@apache.org>
    Change-Id: I374a9ab88807da3188962d7ac0b2b727eaffd010
---
 .../apache/hadoop/hbase/io/hfile/BlockCache.java   |  5 +-
 .../hadoop/hbase/io/hfile/CombinedBlockCache.java  |  6 +-
 .../apache/hadoop/hbase/io/hfile/HFileInfo.java    |  6 ++
 .../hadoop/hbase/io/hfile/HFilePreadReader.java    |  2 +
 .../hadoop/hbase/io/hfile/bucket/BucketCache.java  | 15 +++-
 .../hbase/regionserver/DataTieringManager.java     | 91 +++++++++++++++++-----
 .../hbase/regionserver/TestDataTieringManager.java | 57 ++++++++++++--
 7 files changed, 150 insertions(+), 32 deletions(-)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
index 8f12e367e58..8380fc194e7 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
@@ -184,11 +184,12 @@ public interface BlockCache extends 
Iterable<CachedBlock>, ConfigurationObserver
    * overridden by all implementing classes. In such cases, the returned 
Optional will be empty. For
    * subclasses implementing this logic, the returned Optional would contain 
the boolean value
    * reflecting if the passed file should indeed be cached.
-   * @param fileName to check if it should be cached.
+   * @param hFileInfo Information about the file to check if it should be 
cached.
+   * @param conf      The configuration object to use for determining caching 
behavior.
    * @return empty optional if this method is not supported, otherwise the 
returned optional
    *         contains the boolean value informing if the file should be cached.
    */
-  default Optional<Boolean> shouldCacheFile(String fileName) {
+  default Optional<Boolean> shouldCacheFile(HFileInfo hFileInfo, Configuration 
conf) {
     return Optional.empty();
   }
 
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
index 61976a86f83..fe675aade7b 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
@@ -477,9 +477,9 @@ public class CombinedBlockCache implements 
ResizableBlockCache, HeapSize {
   }
 
   @Override
-  public Optional<Boolean> shouldCacheFile(String fileName) {
-    Optional<Boolean> l1Result = l1Cache.shouldCacheFile(fileName);
-    Optional<Boolean> l2Result = l2Cache.shouldCacheFile(fileName);
+  public Optional<Boolean> shouldCacheFile(HFileInfo hFileInfo, Configuration 
conf) {
+    Optional<Boolean> l1Result = l1Cache.shouldCacheFile(hFileInfo, conf);
+    Optional<Boolean> l2Result = l2Cache.shouldCacheFile(hFileInfo, conf);
     final Mutable<Boolean> combinedResult = new MutableBoolean(true);
     l1Result.ifPresent(b -> combinedResult.setValue(b && 
combinedResult.getValue()));
     l2Result.ifPresent(b -> combinedResult.setValue(b && 
combinedResult.getValue()));
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileInfo.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileInfo.java
index e1623537385..b3ccd248780 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileInfo.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileInfo.java
@@ -121,6 +121,7 @@ public class HFileInfo implements SortedMap<byte[], byte[]> 
{
 
   private FixedFileTrailer trailer;
   private HFileContext hfileContext;
+  private boolean initialized = false;
 
   public HFileInfo() {
     super();
@@ -361,6 +362,10 @@ public class HFileInfo implements SortedMap<byte[], 
byte[]> {
    * should be called after initTrailerAndContext
    */
   public void initMetaAndIndex(HFile.Reader reader) throws IOException {
+    if (initialized) {
+      return;
+    }
+
     ReaderContext context = reader.getContext();
     try {
       HFileBlock.FSReader blockReader = reader.getUncachedBlockReader();
@@ -398,6 +403,7 @@ public class HFileInfo implements SortedMap<byte[], byte[]> 
{
       throw new CorruptHFileException(
         "Problem reading data index and meta index from file " + 
context.getFilePath(), t);
     }
+    initialized = true;
   }
 
   private HFileContext createHFileContext(Path path, FixedFileTrailer trailer, 
Configuration conf)
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePreadReader.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePreadReader.java
index 08649ebb315..3ef5f50db02 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePreadReader.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePreadReader.java
@@ -35,6 +35,8 @@ public class HFilePreadReader extends HFileReaderImpl {
   public HFilePreadReader(ReaderContext context, HFileInfo fileInfo, 
CacheConfig cacheConf,
     Configuration conf) throws IOException {
     super(context, fileInfo, cacheConf, conf);
+    // Initialize HFileInfo object with metadata for caching decisions
+    fileInfo.initMetaAndIndex(this);
     // master hosted regions, like the master procedures store wouldn't have a 
block cache
     // Prefetch file blocks upon open if requested
     if (cacheConf.getBlockCache().isPresent() && 
cacheConf.shouldPrefetchOnOpen()) {
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
index 28b2b05936c..569a04bda8f 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
@@ -79,9 +79,11 @@ import org.apache.hadoop.hbase.io.hfile.CachedBlock;
 import org.apache.hadoop.hbase.io.hfile.CombinedBlockCache;
 import org.apache.hadoop.hbase.io.hfile.HFileBlock;
 import org.apache.hadoop.hbase.io.hfile.HFileContext;
+import org.apache.hadoop.hbase.io.hfile.HFileInfo;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.nio.RefCnt;
 import org.apache.hadoop.hbase.protobuf.ProtobufMagic;
+import org.apache.hadoop.hbase.regionserver.DataTieringManager;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -2317,7 +2319,18 @@ public class BucketCache implements BlockCache, HeapSize 
{
   }
 
   @Override
-  public Optional<Boolean> shouldCacheFile(String fileName) {
+  public Optional<Boolean> shouldCacheFile(HFileInfo hFileInfo, Configuration 
conf) {
+    String fileName = hFileInfo.getHFileContext().getHFileName();
+    try {
+      DataTieringManager dataTieringManager = DataTieringManager.getInstance();
+      if (!dataTieringManager.isHotData(hFileInfo, conf)) {
+        LOG.debug("Data tiering is enabled for file: '{}' and it is not hot 
data", fileName);
+        return Optional.of(false);
+      }
+    } catch (IllegalStateException e) {
+      LOG.error("Error while getting DataTieringManager instance: {}", 
e.getMessage());
+    }
+
     // if we don't have the file in fullyCachedFiles, we should cache it
     return Optional.of(!fullyCachedFiles.containsKey(fileName));
   }
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java
index 2903963f706..b781052efa9 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java
@@ -17,6 +17,9 @@
  */
 package org.apache.hadoop.hbase.regionserver;
 
+import static org.apache.hadoop.hbase.regionserver.HStoreFile.TIMERANGE_KEY;
+
+import java.io.IOException;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.OptionalLong;
@@ -24,6 +27,7 @@ import java.util.Set;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
+import org.apache.hadoop.hbase.io.hfile.HFileInfo;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.yetus.audience.InterfaceAudience;
@@ -106,11 +110,13 @@ public class DataTieringManager {
   }
 
   /**
-   * Determines whether the data associated with the given block cache key is 
considered hot.
+   * Determines whether the data associated with the given block cache key is 
considered hot. If the
+   * data tiering type is set to {@link DataTieringType#TIME_RANGE} and 
maximum timestamp is not
+   * present, it considers {@code Long.MAX_VALUE} as the maximum timestamp, 
making the data hot by
+   * default.
    * @param key the block cache key
    * @return {@code true} if the data is hot, {@code false} otherwise
-   * @throws DataTieringException if there is an error retrieving data tiering 
information or the
-   *                              HFile maximum timestamp
+   * @throws DataTieringException if there is an error retrieving data tiering 
information
    */
   public boolean isHotData(BlockCacheKey key) throws DataTieringException {
     Path hFilePath = key.getFilePath();
@@ -122,37 +128,82 @@ public class DataTieringManager {
 
   /**
    * Determines whether the data in the HFile at the given path is considered 
hot based on the
-   * configured data tiering type and hot data age.
+   * configured data tiering type and hot data age. If the data tiering type 
is set to
+   * {@link DataTieringType#TIME_RANGE} and maximum timestamp is not present, 
it considers
+   * {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by 
default.
    * @param hFilePath the path to the HFile
    * @return {@code true} if the data is hot, {@code false} otherwise
-   * @throws DataTieringException if there is an error retrieving data tiering 
information or the
-   *                              HFile maximum timestamp
+   * @throws DataTieringException if there is an error retrieving data tiering 
information
    */
   public boolean isHotData(Path hFilePath) throws DataTieringException {
     Configuration configuration = getConfiguration(hFilePath);
     DataTieringType dataTieringType = getDataTieringType(configuration);
 
     if (dataTieringType.equals(DataTieringType.TIME_RANGE)) {
-      long hotDataAge = getDataTieringHotDataAge(configuration);
-
-      HStoreFile hStoreFile = getHStoreFile(hFilePath);
-      if (hStoreFile == null) {
-        LOG.error("HStoreFile corresponding to " + hFilePath + " doesn't 
exist");
-        return false;
-      }
-      OptionalLong maxTimestamp = hStoreFile.getMaximumTimestamp();
-      if (!maxTimestamp.isPresent()) {
-        throw new DataTieringException("Maximum timestamp not present for " + 
hFilePath);
-      }
+      return hotDataValidator(getMaxTimestamp(hFilePath), 
getDataTieringHotDataAge(configuration));
+    }
+    // DataTieringType.NONE or other types are considered hot by default
+    return true;
+  }
 
-      long currentTimestamp = 
EnvironmentEdgeManager.getDelegate().currentTime();
-      long diff = currentTimestamp - maxTimestamp.getAsLong();
-      return diff <= hotDataAge;
+  /**
+   * Determines whether the data in the HFile being read is considered hot 
based on the configured
+   * data tiering type and hot data age. If the data tiering type is set to
+   * {@link DataTieringType#TIME_RANGE} and maximum timestamp is not present, 
it considers
+   * {@code Long.MAX_VALUE} as the maximum timestamp, making the data hot by 
default.
+   * @param hFileInfo     Information about the HFile to determine if its data 
is hot.
+   * @param configuration The configuration object to use for determining hot 
data criteria.
+   * @return {@code true} if the data is hot, {@code false} otherwise
+   */
+  public boolean isHotData(HFileInfo hFileInfo, Configuration configuration) {
+    DataTieringType dataTieringType = getDataTieringType(configuration);
+    if (dataTieringType.equals(DataTieringType.TIME_RANGE)) {
+      return hotDataValidator(getMaxTimestamp(hFileInfo), 
getDataTieringHotDataAge(configuration));
     }
     // DataTieringType.NONE or other types are considered hot by default
     return true;
   }
 
+  private boolean hotDataValidator(long maxTimestamp, long hotDataAge) {
+    long currentTimestamp = getCurrentTimestamp();
+    long diff = currentTimestamp - maxTimestamp;
+    return diff <= hotDataAge;
+  }
+
+  private long getMaxTimestamp(Path hFilePath) throws DataTieringException {
+    HStoreFile hStoreFile = getHStoreFile(hFilePath);
+    if (hStoreFile == null) {
+      LOG.error("HStoreFile corresponding to " + hFilePath + " doesn't exist");
+      return Long.MAX_VALUE;
+    }
+    OptionalLong maxTimestamp = hStoreFile.getMaximumTimestamp();
+    if (!maxTimestamp.isPresent()) {
+      LOG.error("Maximum timestamp not present for " + hFilePath);
+      return Long.MAX_VALUE;
+    }
+    return maxTimestamp.getAsLong();
+  }
+
+  private long getMaxTimestamp(HFileInfo hFileInfo) {
+    try {
+      byte[] hFileTimeRange = hFileInfo.get(TIMERANGE_KEY);
+      if (hFileTimeRange == null) {
+        LOG.error("Timestamp information not found for file: {}",
+          hFileInfo.getHFileContext().getHFileName());
+        return Long.MAX_VALUE;
+      }
+      return TimeRangeTracker.parseFrom(hFileTimeRange).getMax();
+    } catch (IOException e) {
+      LOG.error("Error occurred while reading the timestamp metadata of file: 
{}",
+        hFileInfo.getHFileContext().getHFileName(), e);
+      return Long.MAX_VALUE;
+    }
+  }
+
+  private long getCurrentTimestamp() {
+    return EnvironmentEdgeManager.getDelegate().currentTime();
+  }
+
   /**
    * Returns a set of cold data filenames from the given set of cached blocks. 
Cold data is
    * determined by the configured data tiering type and hot data age.
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java
index 54853944583..3e99d453d5b 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java
@@ -17,7 +17,9 @@
  */
 package org.apache.hadoop.hbase.regionserver;
 
+import static org.apache.hadoop.hbase.HConstants.BUCKET_CACHE_SIZE_KEY;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 import java.io.IOException;
 import java.util.ArrayList;
@@ -26,14 +28,17 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
+import java.util.Optional;
 import java.util.Set;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.Waiter;
 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
 import org.apache.hadoop.hbase.client.RegionInfo;
@@ -53,6 +58,7 @@ import 
org.apache.hadoop.hbase.testclassification.RegionServerTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.CommonFSUtils;
+import org.apache.hadoop.hbase.util.Pair;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
 import org.junit.Test;
@@ -87,9 +93,11 @@ public class TestDataTieringManager {
   private static final HBaseTestingUtility TEST_UTIL = new 
HBaseTestingUtility();
   private static Configuration defaultConf;
   private static FileSystem fs;
+  private static BlockCache blockCache;
   private static CacheConfig cacheConf;
   private static Path testDir;
   private static final Map<String, HRegion> testOnlineRegions = new 
HashMap<>();
+
   private static DataTieringManager dataTieringManager;
   private static final List<HStoreFile> hStoreFiles = new ArrayList<>();
 
@@ -104,11 +112,14 @@ public class TestDataTieringManager {
   public static void setupBeforeClass() throws Exception {
     testDir = 
TEST_UTIL.getDataTestDir(TestDataTieringManager.class.getSimpleName());
     defaultConf = TEST_UTIL.getConfiguration();
+    defaultConf.setBoolean(CacheConfig.PREFETCH_BLOCKS_ON_OPEN_KEY, true);
+    defaultConf.setStrings(HConstants.BUCKET_CACHE_IOENGINE_KEY, "offheap");
+    defaultConf.setLong(BUCKET_CACHE_SIZE_KEY, 32);
     fs = HFileSystem.get(defaultConf);
-    BlockCache blockCache = BlockCacheFactory.createBlockCache(defaultConf);
+    blockCache = BlockCacheFactory.createBlockCache(defaultConf);
     cacheConf = new CacheConfig(defaultConf, blockCache);
-    setupOnlineRegions();
     DataTieringManager.instantiate(testOnlineRegions);
+    setupOnlineRegions();
     dataTieringManager = DataTieringManager.getInstance();
   }
 
@@ -197,7 +208,30 @@ public class TestDataTieringManager {
 
     // Test with a filename where corresponding HStoreFile in not present
     hFilePath = new Path(hStoreFiles.get(0).getPath().getParent(), 
"incorrectFileName");
-    testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, 
false);
+    testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, 
true);
+  }
+
+  @Test
+  public void testPrefetchWhenDataTieringEnabled() throws IOException {
+    setPrefetchBlocksOnOpen();
+    initializeTestEnvironment();
+    // Evict blocks from cache by closing the files and passing evict on close.
+    // Then initialize the reader again. Since Prefetch on open is set to 
true, it should prefetch
+    // those blocks.
+    for (HStoreFile file : hStoreFiles) {
+      file.closeStoreFile(true);
+      file.initReader();
+    }
+
+    // Since we have one cold file among four files, only three should get 
prefetched.
+    Optional<Map<String, Pair<String, Long>>> fullyCachedFiles = 
blockCache.getFullyCachedFiles();
+    assertTrue("We should get the fully cached files from the cache", 
fullyCachedFiles.isPresent());
+    Waiter.waitFor(defaultConf, 10000, () -> fullyCachedFiles.get().size() == 
3);
+    assertEquals("Number of fully cached files are incorrect", 3, 
fullyCachedFiles.get().size());
+  }
+
+  private void setPrefetchBlocksOnOpen() {
+    defaultConf.setBoolean(CacheConfig.PREFETCH_BLOCKS_ON_OPEN_KEY, true);
   }
 
   @Test
@@ -278,6 +312,17 @@ public class TestDataTieringManager {
     testDataTieringMethodWithKey(caller, key, expectedResult, null);
   }
 
+  private static void initializeTestEnvironment() throws IOException {
+    setupFileSystemAndCache();
+    setupOnlineRegions();
+  }
+
+  private static void setupFileSystemAndCache() throws IOException {
+    fs = HFileSystem.get(defaultConf);
+    blockCache = BlockCacheFactory.createBlockCache(defaultConf);
+    cacheConf = new CacheConfig(defaultConf, blockCache);
+  }
+
   private static void setupOnlineRegions() throws IOException {
     testOnlineRegions.clear();
     hStoreFiles.clear();
@@ -376,11 +421,11 @@ public class TestDataTieringManager {
 
     writeStoreFileRandomData(storeFileWriter, Bytes.toBytes(columnFamily), 
timestamp);
 
-    StoreContext storeContext = 
StoreContext.getBuilder().withRegionFileSystem(regionFs).build();
+    StoreContext storeContext =
+      StoreContext.getBuilder().withRegionFileSystem(regionFs).build();
 
     StoreFileTracker sft = StoreFileTrackerFactory.create(conf, true, 
storeContext);
-    return new HStoreFile(fs, storeFileWriter.getPath(), conf, cacheConf, 
BloomType.NONE, true,
-      sft);
+    return new HStoreFile(fs, storeFileWriter.getPath(), conf, cacheConf, 
BloomType.NONE, true, sft);
   }
 
   private static void writeStoreFileRandomData(final StoreFileWriter writer, 
byte[] columnFamily,

Reply via email to