This is an automated email from the ASF dual-hosted git repository. wchevreuil pushed a commit to branch HBASE-29427 in repository https://gitbox.apache.org/repos/asf/hbase.git
commit c79d3f30ef9e40298026f8aa98de6cee48a50508 Author: Janardhan Hungund <[email protected]> AuthorDate: Wed Nov 13 18:20:21 2024 +0530 HBASE-29414 Refactor DataTieringManager to make priority logic pluggable Co-authored-by: Wellington Ramos Chevreuil <[email protected]> --- .../apache/hadoop/hbase/io/hfile/BlockCache.java | 2 +- .../hadoop/hbase/io/hfile/CombinedBlockCache.java | 6 +- .../org/apache/hadoop/hbase/io/hfile/HFile.java | 7 ++ .../hadoop/hbase/io/hfile/HFileReaderImpl.java | 2 +- .../hadoop/hbase/io/hfile/HFileWriterImpl.java | 10 +- .../hadoop/hbase/io/hfile/bucket/BucketCache.java | 5 +- .../hadoop/hbase/regionserver/CellTSTiering.java | 37 +++++++ .../hbase/regionserver/CustomCellValueTiering.java | 36 +++++++ .../hadoop/hbase/regionserver/DataTiering.java | 12 +++ .../hbase/regionserver/DataTieringManager.java | 76 +++++--------- .../hadoop/hbase/regionserver/DataTieringType.java | 11 +- .../hadoop/hbase/regionserver/StoreFileWriter.java | 15 ++- ....java => TestCustomCellDataTieringManager.java} | 115 ++++++++++----------- .../hbase/regionserver/TestDataTieringManager.java | 12 ++- 14 files changed, 221 insertions(+), 125 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java index cef5a6488fa..71121c85991 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java @@ -220,7 +220,7 @@ public interface BlockCache extends Iterable<CachedBlock>, ConfigurationObserver * @return An empty Optional if this method is not supported; otherwise, the returned Optional * contains the boolean value indicating if the block should be cached. */ - default Optional<Boolean> shouldCacheBlock(BlockCacheKey key, TimeRangeTracker timeRangeTracker, + default Optional<Boolean> shouldCacheBlock(BlockCacheKey key, long maxTimeStamp, Configuration conf) { return Optional.empty(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java index 672a7bc1e72..ae763e1d5be 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java @@ -494,10 +494,10 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize { } @Override - public Optional<Boolean> shouldCacheBlock(BlockCacheKey key, TimeRangeTracker timeRangeTracker, + public Optional<Boolean> shouldCacheBlock(BlockCacheKey key, long maxTimeStamp, Configuration conf) { - return combineCacheResults(l1Cache.shouldCacheBlock(key, timeRangeTracker, conf), - l2Cache.shouldCacheBlock(key, timeRangeTracker, conf)); + return combineCacheResults(l1Cache.shouldCacheBlock(key, maxTimeStamp, conf), + l2Cache.shouldCacheBlock(key, maxTimeStamp, conf)); } private Optional<Boolean> combineCacheResults(Optional<Boolean> result1, diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java index d8dffce59e8..a99eac4085e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java @@ -43,6 +43,7 @@ import org.apache.hadoop.hbase.io.hfile.ReaderContext.ReaderType; import org.apache.hadoop.hbase.ipc.RpcServer; import org.apache.hadoop.hbase.regionserver.CellSink; import org.apache.hadoop.hbase.regionserver.ShipperListener; +import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; import org.apache.hadoop.hbase.util.BloomFilterWriter; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSUtils; @@ -217,6 +218,12 @@ public final class HFile { */ void appendTrackedTimestampsToMetadata() throws IOException; + /** + * Add Custom cell timestamp to Metadata + */ + public void appendCustomCellTimestampsToMetadata(TimeRangeTracker timeRangeTracker) + throws IOException; + /** Returns the path to this {@link HFile} */ Path getPath(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java index 4b60ef662c2..972e8070e1c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java @@ -1380,7 +1380,7 @@ public abstract class HFileReaderImpl implements HFile.Reader, Configurable { HFileBlock unpackedNoChecksum = BlockCacheUtil.getBlockForCaching(cacheConf, unpacked); // Cache the block if necessary cacheConf.getBlockCache().ifPresent(cache -> { - if (cacheBlock && cacheConf.shouldCacheBlockOnRead(category)) { + if (cacheBlock && cacheOnRead) { // Using the wait on cache during compaction and prefetching. cache.cacheBlock(cacheKey, cacheCompressed diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java index 8d58032799d..5e51be72d2d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hbase.io.hfile; import static org.apache.hadoop.hbase.io.hfile.BlockCompressedSizePredicator.MAX_BLOCK_SIZE_UNCOMPRESSED; +import static org.apache.hadoop.hbase.regionserver.CustomTieringMultiFileWriter.TIERING_CELL_TIME_RANGE; import static org.apache.hadoop.hbase.regionserver.HStoreFile.EARLIEST_PUT_TS; import static org.apache.hadoop.hbase.regionserver.HStoreFile.TIMERANGE_KEY; @@ -598,7 +599,7 @@ public class HFileWriterImpl implements HFile.Writer { } private boolean shouldCacheBlock(BlockCache cache, BlockCacheKey key) { - Optional<Boolean> result = cache.shouldCacheBlock(key, timeRangeToTrack.get(), conf); + Optional<Boolean> result = cache.shouldCacheBlock(key, timeRangeToTrack.get().getMax(), conf); return result.orElse(true); } @@ -909,6 +910,13 @@ public class HFileWriterImpl implements HFile.Writer { appendFileInfo(EARLIEST_PUT_TS, Bytes.toBytes(earliestPutTs)); } + public void appendCustomCellTimestampsToMetadata(TimeRangeTracker timeRangeTracker) + throws IOException { + // TODO: The StoreFileReader always converts the byte[] to TimeRange + // via TimeRangeTracker, so we should write the serialization data of TimeRange directly. + appendFileInfo(TIERING_CELL_TIME_RANGE, TimeRangeTracker.toByteArray(timeRangeTracker)); + } + /** * Record the earliest Put timestamp. If the timeRangeTracker is not set, update TimeRangeTracker * to include the timestamp of this key diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java index 2af46a49d89..21956e6d40a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java @@ -86,7 +86,6 @@ import org.apache.hadoop.hbase.protobuf.ProtobufMagic; import org.apache.hadoop.hbase.regionserver.DataTieringManager; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.StoreFileInfo; -import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.IdReadWriteLock; @@ -2458,10 +2457,10 @@ public class BucketCache implements BlockCache, HeapSize { } @Override - public Optional<Boolean> shouldCacheBlock(BlockCacheKey key, TimeRangeTracker timeRangeTracker, + public Optional<Boolean> shouldCacheBlock(BlockCacheKey key, long maxTimestamp, Configuration conf) { DataTieringManager dataTieringManager = DataTieringManager.getInstance(); - if (dataTieringManager != null && !dataTieringManager.isHotData(timeRangeTracker, conf)) { + if (dataTieringManager != null && !dataTieringManager.isHotData(maxTimestamp, conf)) { LOG.debug("Data tiering is enabled for file: '{}' and it is not hot data", key.getHfileName()); return Optional.of(false); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellTSTiering.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellTSTiering.java new file mode 100644 index 00000000000..0e66a546610 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellTSTiering.java @@ -0,0 +1,37 @@ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.hadoop.hbase.io.hfile.HFileInfo; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.util.OptionalLong; +import static org.apache.hadoop.hbase.regionserver.HStoreFile.TIMERANGE_KEY; + [email protected] +public class CellTSTiering implements DataTiering { + private static final Logger LOG = LoggerFactory.getLogger(CellTSTiering.class); + public long getTimestamp(HStoreFile hStoreFile) { + OptionalLong maxTimestamp = hStoreFile.getMaximumTimestamp(); + if (!maxTimestamp.isPresent()) { + LOG.info("Maximum timestamp not present for {}", hStoreFile.getPath()); + return Long.MAX_VALUE; + } + return maxTimestamp.getAsLong(); + } + public long getTimestamp(HFileInfo hFileInfo) { + try { + byte[] hFileTimeRange = hFileInfo.get(TIMERANGE_KEY); + if (hFileTimeRange == null) { + LOG.info("Timestamp information not found for file: {}", + hFileInfo.getHFileContext().getHFileName()); + return Long.MAX_VALUE; + } + return TimeRangeTracker.parseFrom(hFileTimeRange).getMax(); + } catch (IOException e) { + LOG.error("Error occurred while reading the timestamp metadata of file: {}", + hFileInfo.getHFileContext().getHFileName(), e); + return Long.MAX_VALUE; + } + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CustomCellValueTiering.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CustomCellValueTiering.java new file mode 100644 index 00000000000..9d367c655e3 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CustomCellValueTiering.java @@ -0,0 +1,36 @@ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.io.hfile.HFileInfo; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.util.OptionalLong; +import static org.apache.hadoop.hbase.regionserver.CustomTieringMultiFileWriter.TIERING_CELL_TIME_RANGE; + [email protected] +public class CustomCellValueTiering implements DataTiering { + private static final Logger LOG = LoggerFactory.getLogger(CustomCellValueTiering.class); + private long getMaxTSFromTimeRange(byte[] hFileTimeRange, String hFileName) { + try { + if (hFileTimeRange == null) { + LOG.info("Custom cell-based timestamp information not found for file: {}", hFileName); + return Long.MAX_VALUE; + } + return TimeRangeTracker.parseFrom(hFileTimeRange).getMax(); + } catch (IOException e) { + LOG.error("Error occurred while reading the Custom cell-based timestamp metadata of file: {}", + hFileName, e); + return Long.MAX_VALUE; + } + } + public long getTimestamp(HStoreFile hStoreFile) { + return getMaxTSFromTimeRange(hStoreFile.getMetadataValue(TIERING_CELL_TIME_RANGE), + hStoreFile.getPath().getName()); + } + public long getTimestamp(HFileInfo hFileInfo) { + return getMaxTSFromTimeRange(hFileInfo.get(TIERING_CELL_TIME_RANGE), + hFileInfo.getHFileContext().getHFileName()); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTiering.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTiering.java new file mode 100644 index 00000000000..a1a0984836d --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTiering.java @@ -0,0 +1,12 @@ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.io.hfile.HFileInfo; +import org.apache.yetus.audience.InterfaceAudience; + [email protected] +public interface DataTiering { + long getTimestamp(HStoreFile hFile); + long getTimestamp(HFileInfo hFileInfo); + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java index aa56e3f6444..ee32327a1e3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hbase.regionserver; +import static org.apache.hadoop.hbase.regionserver.HStoreFile.NULL_VALUE; import static org.apache.hadoop.hbase.regionserver.HStoreFile.TIMERANGE_KEY; import java.io.IOException; @@ -25,6 +26,7 @@ import java.util.HashSet; import java.util.Map; import java.util.OptionalLong; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; @@ -136,17 +138,18 @@ public class DataTieringManager { * the data tiering type is set to {@link DataTieringType#TIME_RANGE}, it uses the maximum * timestamp from the time range tracker to determine if the data is hot. Otherwise, it considers * the data as hot by default. - * @param timeRangeTracker the time range tracker containing the timestamps + * @param maxTimestamp the maximum timestamp associated with the data. * @param conf The configuration object to use for determining hot data criteria. * @return {@code true} if the data is hot, {@code false} otherwise */ - public boolean isHotData(TimeRangeTracker timeRangeTracker, Configuration conf) { + public boolean isHotData(long maxTimestamp, Configuration conf) { DataTieringType dataTieringType = getDataTieringType(conf); + if ( - dataTieringType.equals(DataTieringType.TIME_RANGE) - && timeRangeTracker.getMax() != TimeRangeTracker.INITIAL_MAX_TIMESTAMP + !dataTieringType.equals(DataTieringType.NONE) + && maxTimestamp != TimeRangeTracker.INITIAL_MAX_TIMESTAMP ) { - return hotDataValidator(timeRangeTracker.getMax(), getDataTieringHotDataAge(conf)); + return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(conf)); } // DataTieringType.NONE or other types are considered hot by default return true; @@ -165,8 +168,13 @@ public class DataTieringManager { Configuration configuration = getConfiguration(hFilePath); DataTieringType dataTieringType = getDataTieringType(configuration); - if (dataTieringType.equals(DataTieringType.TIME_RANGE)) { - return hotDataValidator(getMaxTimestamp(hFilePath), getDataTieringHotDataAge(configuration)); + if (!dataTieringType.equals(DataTieringType.NONE)) { + HStoreFile hStoreFile = getHStoreFile(hFilePath); + if (hStoreFile == null) { + throw new DataTieringException("Store file corresponding to " + hFilePath + " doesn't exist"); + } + return hotDataValidator(dataTieringType.instance.getTimestamp(getHStoreFile(hFilePath)), + getDataTieringHotDataAge(configuration)); } // DataTieringType.NONE or other types are considered hot by default return true; @@ -181,17 +189,18 @@ public class DataTieringManager { * @param maxTimestamp the maximum timestamp to validate against * @return {@code true} if the data is hot, {@code false} otherwise * @throws DataTieringException if there is an error retrieving data tiering information - */ + public boolean isHotData(Path hFilePath, long maxTimestamp) throws DataTieringException { Configuration configuration = getConfiguration(hFilePath); DataTieringType dataTieringType = getDataTieringType(configuration); - if (dataTieringType.equals(DataTieringType.TIME_RANGE)) { + if (!dataTieringType.equals(DataTieringType.NONE)) { return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(configuration)); } // DataTieringType.NONE or other types are considered hot by default return true; } + */ /** * Determines whether the data in the HFile being read is considered hot based on the configured @@ -204,8 +213,9 @@ public class DataTieringManager { */ public boolean isHotData(HFileInfo hFileInfo, Configuration configuration) { DataTieringType dataTieringType = getDataTieringType(configuration); - if (dataTieringType.equals(DataTieringType.TIME_RANGE)) { - return hotDataValidator(getMaxTimestamp(hFileInfo), getDataTieringHotDataAge(configuration)); + if (hFileInfo != null && !dataTieringType.equals(DataTieringType.NONE)) { + return hotDataValidator(dataTieringType.instance.getTimestamp(hFileInfo), + getDataTieringHotDataAge(configuration)); } // DataTieringType.NONE or other types are considered hot by default return true; @@ -217,36 +227,6 @@ public class DataTieringManager { return diff <= hotDataAge; } - private long getMaxTimestamp(Path hFilePath) throws DataTieringException { - HStoreFile hStoreFile = getHStoreFile(hFilePath); - if (hStoreFile == null) { - LOG.error("HStoreFile corresponding to {} doesn't exist", hFilePath); - return Long.MAX_VALUE; - } - OptionalLong maxTimestamp = hStoreFile.getMaximumTimestamp(); - if (!maxTimestamp.isPresent()) { - LOG.error("Maximum timestamp not present for {}", hFilePath); - return Long.MAX_VALUE; - } - return maxTimestamp.getAsLong(); - } - - private long getMaxTimestamp(HFileInfo hFileInfo) { - try { - byte[] hFileTimeRange = hFileInfo.get(TIMERANGE_KEY); - if (hFileTimeRange == null) { - LOG.error("Timestamp information not found for file: {}", - hFileInfo.getHFileContext().getHFileName()); - return Long.MAX_VALUE; - } - return TimeRangeTracker.parseFrom(hFileTimeRange).getMax(); - } catch (IOException e) { - LOG.error("Error occurred while reading the timestamp metadata of file: {}", - hFileInfo.getHFileContext().getHFileName(), e); - return Long.MAX_VALUE; - } - } - private long getCurrentTimestamp() { return EnvironmentEdgeManager.getDelegate().currentTime(); } @@ -299,7 +279,7 @@ public class DataTieringManager { private HStoreFile getHStoreFile(Path hFilePath) throws DataTieringException { HStore hStore = getHStore(hFilePath); for (HStoreFile file : hStore.getStorefiles()) { - if (file.getPath().equals(hFilePath)) { + if (file.getPath().toUri().getPath().toString().equals(hFilePath.toString())) { return file; } } @@ -330,7 +310,8 @@ public class DataTieringManager { for (HRegion r : this.onlineRegions.values()) { for (HStore hStore : r.getStores()) { Configuration conf = hStore.getReadOnlyConfiguration(); - if (getDataTieringType(conf) != DataTieringType.TIME_RANGE) { + DataTieringType dataTieringType = getDataTieringType(conf); + if (dataTieringType == DataTieringType.NONE) { // Data-Tiering not enabled for the store. Just skip it. continue; } @@ -339,14 +320,9 @@ public class DataTieringManager { for (HStoreFile hStoreFile : hStore.getStorefiles()) { String hFileName = hStoreFile.getFileInfo().getHFileInfo().getHFileContext().getHFileName(); - OptionalLong maxTimestamp = hStoreFile.getMaximumTimestamp(); - if (!maxTimestamp.isPresent()) { - LOG.warn("maxTimestamp missing for file: {}", - hStoreFile.getFileInfo().getActiveFileName()); - continue; - } + long maxTimeStamp = dataTieringType.instance.getTimestamp(hStoreFile); long currentTimestamp = EnvironmentEdgeManager.getDelegate().currentTime(); - long fileAge = currentTimestamp - maxTimestamp.getAsLong(); + long fileAge = currentTimestamp - maxTimeStamp; if (fileAge > hotDataAge) { // Values do not matter. coldFiles.put(hFileName, null); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringType.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringType.java index ee54576a648..88c03a9e610 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringType.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringType.java @@ -17,10 +17,17 @@ */ package org.apache.hadoop.hbase.regionserver; +import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker; import org.apache.yetus.audience.InterfaceAudience; @InterfaceAudience.Public public enum DataTieringType { - NONE, - TIME_RANGE + NONE(null), + TIME_RANGE(new CellTSTiering()), + CUSTOM_CELL_VALUE(new CustomCellValueTiering()); + + final DataTiering instance; + DataTieringType(DataTiering instance) { + this.instance = instance; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java index 569b9d3faa6..58d7fdf1778 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java @@ -255,6 +255,14 @@ public class StoreFileWriter implements CellSink, ShipperListener { } } + public void appendCustomCellTimestampsToMetadata(TimeRangeTracker timeRangeTracker) + throws IOException { + liveFileWriter.appendCustomCellTimestampsToMetadata(timeRangeTracker); + if (historicalFileWriter != null) { + historicalFileWriter.appendCustomCellTimestampsToMetadata(timeRangeTracker); + } + } + @Override public void beforeShipped() throws IOException { liveFileWriter.beforeShipped(); @@ -663,7 +671,12 @@ public class StoreFileWriter implements CellSink, ShipperListener { writer.appendTrackedTimestampsToMetadata(); } - private void appendGeneralBloomfilter(final ExtendedCell cell) throws IOException { + public void appendCustomCellTimestampsToMetadata(TimeRangeTracker timeRangeTracker) + throws IOException { + writer.appendCustomCellTimestampsToMetadata(timeRangeTracker); + } + + private void appendGeneralBloomfilter(final ExtendedCell cell) throws IOException { if (this.generalBloomFilterWriter != null) { /* * http://2.bp.blogspot.com/_Cib_A77V54U/StZMrzaKufI/AAAAAAAAADo/ZhK7bGoJdMQ/s400/KeyValue. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCustomCellDataTieringManager.java similarity index 92% copy from hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java copy to hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCustomCellDataTieringManager.java index 37e0fe98e7d..9bf8138933a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCustomCellDataTieringManager.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.regionserver; import static org.apache.hadoop.hbase.HConstants.BUCKET_CACHE_SIZE_KEY; import static org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.DEFAULT_ERROR_TOLERATION_DURATION; +import static org.apache.hadoop.hbase.regionserver.TestDataTieringManager.createHStoreFile; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -79,30 +80,30 @@ import org.slf4j.LoggerFactory; /** * This class is used to test the functionality of the DataTieringManager. * - * The mock online regions are stored in {@link TestDataTieringManager#testOnlineRegions}. - * For all tests, the setup of {@link TestDataTieringManager#testOnlineRegions} occurs only once. - * Please refer to {@link TestDataTieringManager#setupOnlineRegions()} for the structure. - * Additionally, a list of all store files is maintained in {@link TestDataTieringManager#hStoreFiles}. + * The mock online regions are stored in {@link TestCustomCellDataTieringManager#testOnlineRegions}. + * For all tests, the setup of {@link TestCustomCellDataTieringManager#testOnlineRegions} occurs only once. + * Please refer to {@link TestCustomCellDataTieringManager#setupOnlineRegions()} for the structure. + * Additionally, a list of all store files is maintained in {@link TestCustomCellDataTieringManager#hStoreFiles}. * The characteristics of these store files are listed below: * @formatter:off ## HStoreFile Information * * | HStoreFile | Region | Store | DataTiering | isHot | * |------------------|--------------------|---------------------|-----------------------|-------| - * | hStoreFile0 | region1 | hStore11 | TIME_RANGE | true | + * | hStoreFile0 | region1 | hStore11 | CUSTOM_CELL_VALUE | true | * | hStoreFile1 | region1 | hStore12 | NONE | true | - * | hStoreFile2 | region2 | hStore21 | TIME_RANGE | true | - * | hStoreFile3 | region2 | hStore22 | TIME_RANGE | false | + * | hStoreFile2 | region2 | hStore21 | CUSTOM_CELL_VALUE | true | + * | hStoreFile3 | region2 | hStore22 | CUSTOM_CELL_VALUE | false | * @formatter:on */ @Category({ RegionServerTests.class, SmallTests.class }) -public class TestDataTieringManager { +public class TestCustomCellDataTieringManager { @ClassRule public static final HBaseClassTestRule CLASS_RULE = - HBaseClassTestRule.forClass(TestDataTieringManager.class); + HBaseClassTestRule.forClass(TestCustomCellDataTieringManager.class); - private static final Logger LOG = LoggerFactory.getLogger(TestDataTieringManager.class); + private static final Logger LOG = LoggerFactory.getLogger(TestCustomCellDataTieringManager.class); private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); private static final long DAY = 24 * 60 * 60 * 1000; private static Configuration defaultConf; @@ -124,7 +125,7 @@ public class TestDataTieringManager { @BeforeClass public static void setupBeforeClass() throws Exception { - testDir = TEST_UTIL.getDataTestDir(TestDataTieringManager.class.getSimpleName()); + testDir = TEST_UTIL.getDataTestDir(TestCustomCellDataTieringManager.class.getSimpleName()); defaultConf = TEST_UTIL.getConfiguration(); updateCommonConfigurations(); assertTrue(DataTieringManager.instantiate(defaultConf, testOnlineRegions)); @@ -227,7 +228,8 @@ public class TestDataTieringManager { // Test with a filename where corresponding HStoreFile in not present hFilePath = new Path(hStoreFiles.get(0).getPath().getParent(), "incorrectFileName"); - testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, true); + testDataTieringMethodWithPathExpectingException(methodCallerWithPath, hFilePath, + new DataTieringException("Store file corresponding to " + hFilePath + " doesn't exist")); } @Test @@ -294,7 +296,7 @@ public class TestDataTieringManager { setCacheCompactBlocksOnWrite(); initializeTestEnvironment(); - HRegion region = createHRegion("table3", getConfWithTimeRangeDataTieringEnabled(5 * DAY)); + HRegion region = createHRegion("table3", getConfWithCustomCellDataTieringEnabled(5 * DAY)); testCacheCompactedBlocksOnWrite(region, true); } @@ -303,7 +305,7 @@ public class TestDataTieringManager { setCacheCompactBlocksOnWrite(); initializeTestEnvironment(); - HRegion region = createHRegion("table3", getConfWithTimeRangeDataTieringEnabled(DAY)); + HRegion region = createHRegion("table3", getConfWithCustomCellDataTieringEnabled(DAY)); testCacheCompactedBlocksOnWrite(region, false); } @@ -338,12 +340,11 @@ public class TestDataTieringManager { Path storeDir = hStore.getStoreContext().getFamilyStoreDirectoryPath(); Configuration configuration = hStore.getReadOnlyConfiguration(); - createHStoreFile(storeDir, configuration, currentTime - 2 * DAY, - hStore.getHRegion().getRegionFileSystem()); - createHStoreFile(storeDir, configuration, currentTime - 3 * DAY, - hStore.getHRegion().getRegionFileSystem()); - createHStoreFile(storeDir, configuration, currentTime - 4 * DAY, - hStore.getHRegion().getRegionFileSystem()); + HRegionFileSystem regionFS = hStore.getHRegion().getRegionFileSystem(); + + createHStoreFile(storeDir, configuration, currentTime - 2 * DAY, regionFS); + createHStoreFile(storeDir, configuration, currentTime - 3 * DAY, regionFS); + createHStoreFile(storeDir, configuration, currentTime - 4 * DAY, regionFS); } @Test @@ -571,57 +572,46 @@ public class TestDataTieringManager { @Test public void testCacheConfigShouldCacheFile() throws Exception { - // Evict the files from cache. - for (HStoreFile file : hStoreFiles) { - file.closeStoreFile(true); - } + initializeTestEnvironment(); // Verify that the API shouldCacheFileBlock returns the result correctly. // hStoreFiles[0], hStoreFiles[1], hStoreFiles[2] are hot files. // hStoreFiles[3] is a cold file. - try { - assertTrue(cacheConf.shouldCacheBlockOnRead(BlockCategory.DATA, - hStoreFiles.get(0).getFileInfo().getHFileInfo(), - hStoreFiles.get(0).getFileInfo().getConf())); - assertTrue(cacheConf.shouldCacheBlockOnRead(BlockCategory.DATA, - hStoreFiles.get(1).getFileInfo().getHFileInfo(), - hStoreFiles.get(1).getFileInfo().getConf())); - assertTrue(cacheConf.shouldCacheBlockOnRead(BlockCategory.DATA, - hStoreFiles.get(2).getFileInfo().getHFileInfo(), - hStoreFiles.get(2).getFileInfo().getConf())); - assertFalse(cacheConf.shouldCacheBlockOnRead(BlockCategory.DATA, - hStoreFiles.get(3).getFileInfo().getHFileInfo(), - hStoreFiles.get(3).getFileInfo().getConf())); - } finally { - for (HStoreFile file : hStoreFiles) { - file.initReader(); - } - } + assertTrue(cacheConf.shouldCacheBlockOnRead(BlockCategory.DATA, + hStoreFiles.get(0).getFileInfo().getHFileInfo(), hStoreFiles.get(0).getFileInfo().getConf())); + assertTrue(cacheConf.shouldCacheBlockOnRead(BlockCategory.DATA, + hStoreFiles.get(1).getFileInfo().getHFileInfo(), hStoreFiles.get(1).getFileInfo().getConf())); + assertTrue(cacheConf.shouldCacheBlockOnRead(BlockCategory.DATA, + hStoreFiles.get(2).getFileInfo().getHFileInfo(), hStoreFiles.get(2).getFileInfo().getConf())); + assertFalse(cacheConf.shouldCacheBlockOnRead(BlockCategory.DATA, + hStoreFiles.get(3).getFileInfo().getHFileInfo(), hStoreFiles.get(3).getFileInfo().getConf())); } @Test public void testCacheOnReadColdFile() throws Exception { + initializeTestEnvironment(); // hStoreFiles[3] is a cold file. the blocks should not get loaded after a readBlock call. HStoreFile hStoreFile = hStoreFiles.get(3); BlockCacheKey cacheKey = new BlockCacheKey(hStoreFile.getPath(), 0, true, BlockType.DATA); - testCacheOnRead(hStoreFile, cacheKey, 23025, false); + testCacheOnRead(hStoreFile, cacheKey, -1, false); } @Test public void testCacheOnReadHotFile() throws Exception { + initializeTestEnvironment(); // hStoreFiles[0] is a hot file. the blocks should get loaded after a readBlock call. HStoreFile hStoreFile = hStoreFiles.get(0); BlockCacheKey cacheKey = new BlockCacheKey(hStoreFiles.get(0).getPath(), 0, true, BlockType.DATA); - testCacheOnRead(hStoreFile, cacheKey, 23025, true); + testCacheOnRead(hStoreFile, cacheKey, -1, true); } - private void testCacheOnRead(HStoreFile hStoreFile, BlockCacheKey key, long onDiskBlockSize, + private void testCacheOnRead(HStoreFile hStoreFile, BlockCacheKey key, long onDiskBlockSize, boolean expectedCached) throws Exception { // Execute the read block API which will try to cache the block if the block is a hot block. hStoreFile.getReader().getHFileReader().readBlock(key.getOffset(), onDiskBlockSize, true, false, false, false, key.getBlockType(), DataBlockEncoding.NONE); // Validate that the hot block gets cached and cold block is not cached. - HFileBlock block = (HFileBlock) blockCache.getBlock(key, false, false, false, BlockType.DATA); + HFileBlock block = (HFileBlock) blockCache.getBlock(key, false, false, false); if (expectedCached) { assertNotNull(block); } else { @@ -643,7 +633,8 @@ public class TestDataTieringManager { numColdBlocks++; } } catch (Exception e) { - fail("Unexpected exception!"); + LOG.debug("Error validating priority for key {}",key, e); + fail(e.getMessage()); } } assertEquals(expectedHotBlocks, numHotBlocks); @@ -721,7 +712,7 @@ public class TestDataTieringManager { HRegion region1 = createHRegion("table1"); - HStore hStore11 = createHStore(region1, "cf1", getConfWithTimeRangeDataTieringEnabled(day)); + HStore hStore11 = createHStore(region1, "cf1", getConfWithCustomCellDataTieringEnabled(day)); hStoreFiles.add(createHStoreFile(hStore11.getStoreContext().getFamilyStoreDirectoryPath(), hStore11.getReadOnlyConfiguration(), currentTime, region1.getRegionFileSystem())); hStore11.refreshStoreFiles(); @@ -734,15 +725,17 @@ public class TestDataTieringManager { region1.stores.put(Bytes.toBytes("cf2"), hStore12); HRegion region2 = - createHRegion("table2", getConfWithTimeRangeDataTieringEnabled((long) (2.5 * day))); + createHRegion("table2", getConfWithCustomCellDataTieringEnabled((long) (2.5 * day))); HStore hStore21 = createHStore(region2, "cf1"); hStoreFiles.add(createHStoreFile(hStore21.getStoreContext().getFamilyStoreDirectoryPath(), - hStore21.getReadOnlyConfiguration(), currentTime - 2 * day, region2.getRegionFileSystem())); + hStore21.getReadOnlyConfiguration(), currentTime - 2 * day, + region2.getRegionFileSystem())); hStore21.refreshStoreFiles(); HStore hStore22 = createHStore(region2, "cf2"); hStoreFiles.add(createHStoreFile(hStore22.getStoreContext().getFamilyStoreDirectoryPath(), - hStore22.getReadOnlyConfiguration(), currentTime - 3 * day, region2.getRegionFileSystem())); + hStore22.getReadOnlyConfiguration(), currentTime - 3 * day, + region2.getRegionFileSystem())); hStore22.refreshStoreFiles(); region2.stores.put(Bytes.toBytes("cf1"), hStore21); @@ -799,15 +792,9 @@ public class TestDataTieringManager { return new HStore(region, columnFamilyDescriptor, conf, false); } - private static Configuration getConfWithTimeRangeDataTieringEnabled(long hotDataAge) { - Configuration conf = new Configuration(defaultConf); - conf.set(DataTieringManager.DATATIERING_KEY, DataTieringType.TIME_RANGE.name()); - conf.set(DataTieringManager.DATATIERING_HOT_DATA_AGE_KEY, String.valueOf(hotDataAge)); - return conf; - } private static HStoreFile createHStoreFile(Path storeDir, Configuration conf, long timestamp, - HRegionFileSystem regionFs) throws IOException { + HRegionFileSystem regionFs) throws IOException { String columnFamily = storeDir.getName(); StoreFileWriter storeFileWriter = new StoreFileWriter.Builder(conf, cacheConf, fs) @@ -817,11 +804,17 @@ public class TestDataTieringManager { StoreContext storeContext = StoreContext.getBuilder().withRegionFileSystem(regionFs).build(); - StoreFileTracker sft = StoreFileTrackerFactory.create(conf, true, storeContext); return new HStoreFile(fs, storeFileWriter.getPath(), conf, cacheConf, BloomType.NONE, true, sft); } + private static Configuration getConfWithCustomCellDataTieringEnabled(long hotDataAge) { + Configuration conf = new Configuration(defaultConf); + conf.set(DataTieringManager.DATATIERING_KEY, DataTieringType.CUSTOM_CELL_VALUE.name()); + conf.set(DataTieringManager.DATATIERING_HOT_DATA_AGE_KEY, String.valueOf(hotDataAge)); + return conf; + } + /** * Writes random data to a store file with rows arranged in lexicographically increasing order. * Each row is generated using the {@link #nextString()} method, ensuring that each subsequent row @@ -839,6 +832,10 @@ public class TestDataTieringManager { } } finally { writer.appendTrackedTimestampsToMetadata(); + TimeRangeTracker timeRangeTracker = TimeRangeTracker.create(TimeRangeTracker.Type.NON_SYNC); + timeRangeTracker.setMin(timestamp); + timeRangeTracker.setMax(timestamp); + writer.appendCustomCellTimestampsToMetadata(timeRangeTracker); writer.close(); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java index 37e0fe98e7d..ebe027591ec 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java @@ -61,6 +61,7 @@ import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.CacheTestUtils; import org.apache.hadoop.hbase.io.hfile.HFileBlock; import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; +import org.apache.hadoop.hbase.io.hfile.HFileWriterImpl; import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache; import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker; import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; @@ -227,7 +228,8 @@ public class TestDataTieringManager { // Test with a filename where corresponding HStoreFile in not present hFilePath = new Path(hStoreFiles.get(0).getPath().getParent(), "incorrectFileName"); - testDataTieringMethodWithPathNoException(methodCallerWithPath, hFilePath, true); + testDataTieringMethodWithPathExpectingException(methodCallerWithPath, hFilePath, + new DataTieringException("Store file corresponding to " + hFilePath + " doesn't exist")); } @Test @@ -600,19 +602,21 @@ public class TestDataTieringManager { @Test public void testCacheOnReadColdFile() throws Exception { + initializeTestEnvironment(); // hStoreFiles[3] is a cold file. the blocks should not get loaded after a readBlock call. HStoreFile hStoreFile = hStoreFiles.get(3); BlockCacheKey cacheKey = new BlockCacheKey(hStoreFile.getPath(), 0, true, BlockType.DATA); - testCacheOnRead(hStoreFile, cacheKey, 23025, false); + testCacheOnRead(hStoreFile, cacheKey, -1, false); } @Test public void testCacheOnReadHotFile() throws Exception { + initializeTestEnvironment(); // hStoreFiles[0] is a hot file. the blocks should get loaded after a readBlock call. HStoreFile hStoreFile = hStoreFiles.get(0); BlockCacheKey cacheKey = new BlockCacheKey(hStoreFiles.get(0).getPath(), 0, true, BlockType.DATA); - testCacheOnRead(hStoreFile, cacheKey, 23025, true); + testCacheOnRead(hStoreFile, cacheKey, -1, true); } private void testCacheOnRead(HStoreFile hStoreFile, BlockCacheKey key, long onDiskBlockSize, @@ -806,7 +810,7 @@ public class TestDataTieringManager { return conf; } - private static HStoreFile createHStoreFile(Path storeDir, Configuration conf, long timestamp, + static HStoreFile createHStoreFile(Path storeDir, Configuration conf, long timestamp, HRegionFileSystem regionFs) throws IOException { String columnFamily = storeDir.getName();
