This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 1e285dc  [HUDI-2489]Tuning HoodieROTablePathFilter by caching 
hoodieTableFileSystemView, aiming to reduce unnecessary list/get requests 
(#3719)
1e285dc is described below

commit 1e285dc3999c56a3302b5ed9e5c38a6bcb884f92
Author: zhangyue19921010 <[email protected]>
AuthorDate: Sat Oct 23 00:03:58 2021 +0800

    [HUDI-2489]Tuning HoodieROTablePathFilter by caching 
hoodieTableFileSystemView, aiming to reduce unnecessary list/get requests 
(#3719)
    
    Co-authored-by: yuezhang <[email protected]>
---
 .../apache/hudi/hadoop/HoodieROTablePathFilter.java | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git 
a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java
 
b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java
index d94018b..c797f59 100644
--- 
a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java
+++ 
b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java
@@ -79,6 +79,11 @@ public class HoodieROTablePathFilter implements 
Configurable, PathFilter, Serial
   Map<String, HoodieTableMetaClient> metaClientCache;
 
   /**
+   * HoodieTableFileSystemView Cache.
+   */
+  private Map<String, HoodieTableFileSystemView> 
hoodieTableFileSystemViewCache;
+
+  /**
    * Hadoop configurations for the FileSystem.
    */
   private SerializableConfiguration conf;
@@ -97,6 +102,7 @@ public class HoodieROTablePathFilter implements 
Configurable, PathFilter, Serial
     this.nonHoodiePathCache = new HashSet<>();
     this.conf = new SerializableConfiguration(conf);
     this.metaClientCache = new HashMap<>();
+    this.hoodieTableFileSystemViewCache = new HashMap<>();
   }
 
   /**
@@ -175,8 +181,15 @@ public class HoodieROTablePathFilter implements 
Configurable, PathFilter, Serial
             metaClientCache.put(baseDir.toString(), metaClient);
           }
 
-          fsView = 
FileSystemViewManager.createInMemoryFileSystemView(engineContext,
-              metaClient, 
HoodieInputFormatUtils.buildMetadataConfig(getConf()));
+          HoodieTableMetaClient finalMetaClient = metaClient;
+          fsView = 
hoodieTableFileSystemViewCache.computeIfAbsent(baseDir.toString(), key ->
+                  FileSystemViewManager.createInMemoryFileSystemView(
+                          engineContext,
+                          finalMetaClient,
+                          HoodieInputFormatUtils.buildMetadataConfig(getConf())
+                  )
+          );
+
           String partition = FSUtils.getRelativePartitionPath(new 
Path(metaClient.getBasePath()), folder);
           List<HoodieBaseFile> latestFiles = 
fsView.getLatestBaseFiles(partition).collect(Collectors.toList());
           // populate the cache
@@ -202,10 +215,6 @@ public class HoodieROTablePathFilter implements 
Configurable, PathFilter, Serial
           }
           nonHoodiePathCache.add(folder.toString());
           return true;
-        } finally {
-          if (fsView != null) {
-            fsView.close();
-          }
         }
       } else {
         // files is at < 3 level depth in FS tree, can't be hoodie dataset

Reply via email to