This is an automated email from the ASF dual-hosted git repository.
sivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 1e285dc [HUDI-2489]Tuning HoodieROTablePathFilter by caching
hoodieTableFileSystemView, aiming to reduce unnecessary list/get requests
(#3719)
1e285dc is described below
commit 1e285dc3999c56a3302b5ed9e5c38a6bcb884f92
Author: zhangyue19921010 <[email protected]>
AuthorDate: Sat Oct 23 00:03:58 2021 +0800
[HUDI-2489]Tuning HoodieROTablePathFilter by caching
hoodieTableFileSystemView, aiming to reduce unnecessary list/get requests
(#3719)
Co-authored-by: yuezhang <[email protected]>
---
.../apache/hudi/hadoop/HoodieROTablePathFilter.java | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git
a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java
b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java
index d94018b..c797f59 100644
---
a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java
+++
b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java
@@ -79,6 +79,11 @@ public class HoodieROTablePathFilter implements
Configurable, PathFilter, Serial
Map<String, HoodieTableMetaClient> metaClientCache;
/**
+ * HoodieTableFileSystemView Cache.
+ */
+ private Map<String, HoodieTableFileSystemView>
hoodieTableFileSystemViewCache;
+
+ /**
* Hadoop configurations for the FileSystem.
*/
private SerializableConfiguration conf;
@@ -97,6 +102,7 @@ public class HoodieROTablePathFilter implements
Configurable, PathFilter, Serial
this.nonHoodiePathCache = new HashSet<>();
this.conf = new SerializableConfiguration(conf);
this.metaClientCache = new HashMap<>();
+ this.hoodieTableFileSystemViewCache = new HashMap<>();
}
/**
@@ -175,8 +181,15 @@ public class HoodieROTablePathFilter implements
Configurable, PathFilter, Serial
metaClientCache.put(baseDir.toString(), metaClient);
}
- fsView =
FileSystemViewManager.createInMemoryFileSystemView(engineContext,
- metaClient,
HoodieInputFormatUtils.buildMetadataConfig(getConf()));
+ HoodieTableMetaClient finalMetaClient = metaClient;
+ fsView =
hoodieTableFileSystemViewCache.computeIfAbsent(baseDir.toString(), key ->
+ FileSystemViewManager.createInMemoryFileSystemView(
+ engineContext,
+ finalMetaClient,
+ HoodieInputFormatUtils.buildMetadataConfig(getConf())
+ )
+ );
+
String partition = FSUtils.getRelativePartitionPath(new
Path(metaClient.getBasePath()), folder);
List<HoodieBaseFile> latestFiles =
fsView.getLatestBaseFiles(partition).collect(Collectors.toList());
// populate the cache
@@ -202,10 +215,6 @@ public class HoodieROTablePathFilter implements
Configurable, PathFilter, Serial
}
nonHoodiePathCache.add(folder.toString());
return true;
- } finally {
- if (fsView != null) {
- fsView.close();
- }
}
} else {
// files is at < 3 level depth in FS tree, can't be hoodie dataset