linliu-code commented on code in PR #13427:
URL: https://github.com/apache/hudi/pull/13427#discussion_r2150761891
##########
hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java:
##########
@@ -101,34 +110,72 @@ public class HoodieAvroHFileReader extends
HoodieAvroFileReaderBase implements H
private final Object sharedLock = new Object();
- public HoodieAvroHFileReader(Configuration hadoopConf, Path path,
CacheConfig cacheConfig) throws IOException {
- this(path, FSUtils.getFs(path.toString(), hadoopConf), hadoopConf,
cacheConfig, Option.empty());
+ public HoodieAvroHFileReader(Configuration hadoopConf, Path path) throws
IOException {
+ this(path, FSUtils.getFs(path.toString(), hadoopConf), hadoopConf,
Option.empty());
}
- public HoodieAvroHFileReader(Configuration hadoopConf, Path path,
CacheConfig cacheConfig, FileSystem fs, Option<Schema> schemaOpt) throws
IOException {
- this(path, fs, hadoopConf, cacheConfig, schemaOpt);
+ public HoodieAvroHFileReader(Configuration hadoopConf, Path path, FileSystem
fs, Option<Schema> schemaOpt) throws IOException {
+ this(path, fs, hadoopConf, schemaOpt);
}
- public HoodieAvroHFileReader(Configuration hadoopConf, Path path,
CacheConfig cacheConfig, FileSystem fs, byte[] content, Option<Schema>
schemaOpt) throws IOException {
- this(path, fs, hadoopConf, cacheConfig, schemaOpt, Option.of(content));
+ public HoodieAvroHFileReader(Configuration hadoopConf, Path path, FileSystem
fs, byte[] content, Option<Schema> schemaOpt) throws IOException {
+ this(path, fs, hadoopConf, schemaOpt, Option.of(content));
}
- public HoodieAvroHFileReader(Path path, FileSystem fs, Configuration
hadoopConf, CacheConfig config, Option<Schema> schemaOpt) throws IOException {
- this(path, fs, hadoopConf, config, schemaOpt, Option.empty());
+ public HoodieAvroHFileReader(Path path, FileSystem fs, Configuration
hadoopConf, Option<Schema> schemaOpt) throws IOException {
+ this(path, fs, hadoopConf, schemaOpt, Option.empty());
}
- public HoodieAvroHFileReader(Path path, FileSystem fs, Configuration
hadoopConf, CacheConfig config, Option<Schema> schemaOpt, Option<byte[]>
content) throws IOException {
+ public HoodieAvroHFileReader(Path path, FileSystem fs, Configuration
hadoopConf, Option<Schema> schemaOpt, Option<byte[]> content) throws
IOException {
this.path = path;
this.fs = fs;
this.hadoopConf = hadoopConf;
- this.config = config;
this.content = content;
// Shared reader is instantiated lazily.
this.sharedReader = Option.empty();
this.sharedScanner = Option.empty();
this.schema = schemaOpt.map(Lazy::eagerly)
.orElseGet(() -> Lazy.lazily(() ->
fetchSchema(getSharedHFileReader())));
+
+ synchronized (HoodieAvroHFileReader.class) {
+ // HBase 2.4+ does not allocate a block cache automatically within the
CacheConfig but requires a BlockCache
+ // instance to be passed in. This is different from HBase 1.x where
CacheConfig allocated and used a static global
+ // BlockCache instance.
+ //
https://github.com/apache/hbase/blob/branch-1.4/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java#L688
+
+ // BlockCache speeds up lookup from the HFile. To emulate the behavior
of HBase 1.x, we will allocate a global
+ // static global block cache here if it is enabled. The configs for the
BlockCache are described in the link
+ // below and can be passed using the hadoop configuration.
+ //
https://github.com/apache/hbase/blob/master/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheFactory.java
+ if (GLOBAL_HFILE_BLOCK_CACHE == null) {
+ GLOBAL_HFILE_BLOCK_CACHE =
BlockCacheFactory.createBlockCache(hadoopConf);
+ if (GLOBAL_HFILE_BLOCK_CACHE != null) {
+ LOG.info("Allocated a new global block cache for hfile readers " +
GLOBAL_HFILE_BLOCK_CACHE);
+ }
+ }
+
+ // The BlockCache keys are based on the name of the files being cached.
Within HUDI, there is a non-zero chance that
+ // two different files can have the same name. Also, when multiple
datasets are being accessed in the same JVM, there
Review Comment:
1. When do two files have the same name?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]