This is an automated email from the ASF dual-hosted git repository.

abhijain pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new 1c875ce798 Skip Permission Check for files existing in target, (#4162)
1c875ce798 is described below

commit 1c875ce7982851779abb711136a421df1f1bec6c
Author: Vyom Aggarwal <[email protected]>
AuthorDate: Thu Mar 5 23:13:38 2026 -0800

    Skip Permission Check for files existing in target, (#4162)
    
    - Added skipPermissionCheck flag to skip I/O operations for files that 
already exist in the target location
      - When enabled, skips expensive file status checks and permission 
resolutions for existing files
      - Improves performance for discovery by avoiding unnecessary I/O 
operations
      - Disabled by default to maintain backward compatibility
---
 .../apache/gobblin/data/management/copy/ManifestBasedDataset.java | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
index 531f506bf8..cab65f1275 100644
--- 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
+++ 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
@@ -69,8 +69,10 @@ public class ManifestBasedDataset implements 
IterableCopyableDataset {
 
   // Enable setting permission post publish to reset permission bits, default 
is true
   private static final String ENABLE_SET_PERMISSION_POST_PUBLISH = 
ManifestBasedDatasetFinder.CONFIG_PREFIX + ".enableSetPermissionPostPublish";
+  public static final String SKIP_PERMISSION_CHECK = 
ManifestBasedDatasetFinder.CONFIG_PREFIX + ".skipPermissionCheck";
   private static final String DEFAULT_PERMISSION_CACHE_TTL_SECONDS = "30";
   private static final String DEFAULT_COMMON_FILES_PARENT = "/";
+  private static final boolean DEFAULT_SKIP_PERMISSION_CHECK = false;
   private final FileSystem srcFs;
   private final FileSystem manifestReadFs;
   private final Path manifestPath;
@@ -80,6 +82,7 @@ public class ManifestBasedDataset implements 
IterableCopyableDataset {
   private final int permissionCacheTTLSeconds;
 
   private final boolean enableSetPermissionPostPublish;
+  private final boolean skipPermissionCheck;
 
   public ManifestBasedDataset(final FileSystem srcFs, final FileSystem 
manifestReadFs, final Path manifestPath, final Properties properties) {
     this.srcFs = srcFs;
@@ -90,6 +93,7 @@ public class ManifestBasedDataset implements 
IterableCopyableDataset {
     this.commonFilesParent = properties.getProperty(COMMON_FILES_PARENT, 
DEFAULT_COMMON_FILES_PARENT);
     this.permissionCacheTTLSeconds = 
Integer.parseInt(properties.getProperty(PERMISSION_CACHE_TTL_SECONDS, 
DEFAULT_PERMISSION_CACHE_TTL_SECONDS));
     this.enableSetPermissionPostPublish = 
Boolean.parseBoolean(properties.getProperty(ENABLE_SET_PERMISSION_POST_PUBLISH, 
"true"));
+    this.skipPermissionCheck = 
Boolean.parseBoolean(properties.getProperty(SKIP_PERMISSION_CHECK, 
String.valueOf(DEFAULT_SKIP_PERMISSION_CHECK)));
   }
 
   @Override
@@ -132,6 +136,10 @@ public class ManifestBasedDataset implements 
IterableCopyableDataset {
         Path fileToCopy = new Path(file.fileName);
         if (srcFs.exists(fileToCopy)) {
           boolean existOnTarget = targetFs.exists(fileToCopy);
+          if (this.skipPermissionCheck && existOnTarget) {
+            // Skip Permission Check for files that already exist in the 
target when skipPermissionCheck is true
+            continue;
+          }
           FileStatus srcFile = srcFs.getFileStatus(fileToCopy);
           OwnerAndPermission replicatedPermission = 
CopyableFile.resolveReplicatedOwnerAndPermission(srcFs, srcFile, configuration);
           if (!existOnTarget || shouldCopy(targetFs, srcFile, 
targetFs.getFileStatus(fileToCopy), replicatedPermission)) {

Reply via email to