This is an automated email from the ASF dual-hosted git repository.
abhijain pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new 1c875ce798 Skip Permission Check for files existing in target, (#4162)
1c875ce798 is described below
commit 1c875ce7982851779abb711136a421df1f1bec6c
Author: Vyom Aggarwal <[email protected]>
AuthorDate: Thu Mar 5 23:13:38 2026 -0800
Skip Permission Check for files existing in target, (#4162)
- Added skipPermissionCheck flag to skip I/O operations for files that
already exist in the target location
- When enabled, skips expensive file status checks and permission
resolutions for existing files
- Improves performance for discovery by avoiding unnecessary I/O
operations
- Disabled by default to maintain backward compatibility
---
.../apache/gobblin/data/management/copy/ManifestBasedDataset.java | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
index 531f506bf8..cab65f1275 100644
---
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
+++
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
@@ -69,8 +69,10 @@ public class ManifestBasedDataset implements
IterableCopyableDataset {
// Enable setting permission post publish to reset permission bits, default
is true
private static final String ENABLE_SET_PERMISSION_POST_PUBLISH =
ManifestBasedDatasetFinder.CONFIG_PREFIX + ".enableSetPermissionPostPublish";
+ public static final String SKIP_PERMISSION_CHECK =
ManifestBasedDatasetFinder.CONFIG_PREFIX + ".skipPermissionCheck";
private static final String DEFAULT_PERMISSION_CACHE_TTL_SECONDS = "30";
private static final String DEFAULT_COMMON_FILES_PARENT = "/";
+ private static final boolean DEFAULT_SKIP_PERMISSION_CHECK = false;
private final FileSystem srcFs;
private final FileSystem manifestReadFs;
private final Path manifestPath;
@@ -80,6 +82,7 @@ public class ManifestBasedDataset implements
IterableCopyableDataset {
private final int permissionCacheTTLSeconds;
private final boolean enableSetPermissionPostPublish;
+ private final boolean skipPermissionCheck;
public ManifestBasedDataset(final FileSystem srcFs, final FileSystem
manifestReadFs, final Path manifestPath, final Properties properties) {
this.srcFs = srcFs;
@@ -90,6 +93,7 @@ public class ManifestBasedDataset implements
IterableCopyableDataset {
this.commonFilesParent = properties.getProperty(COMMON_FILES_PARENT,
DEFAULT_COMMON_FILES_PARENT);
this.permissionCacheTTLSeconds =
Integer.parseInt(properties.getProperty(PERMISSION_CACHE_TTL_SECONDS,
DEFAULT_PERMISSION_CACHE_TTL_SECONDS));
this.enableSetPermissionPostPublish =
Boolean.parseBoolean(properties.getProperty(ENABLE_SET_PERMISSION_POST_PUBLISH,
"true"));
+ this.skipPermissionCheck =
Boolean.parseBoolean(properties.getProperty(SKIP_PERMISSION_CHECK,
String.valueOf(DEFAULT_SKIP_PERMISSION_CHECK)));
}
@Override
@@ -132,6 +136,10 @@ public class ManifestBasedDataset implements
IterableCopyableDataset {
Path fileToCopy = new Path(file.fileName);
if (srcFs.exists(fileToCopy)) {
boolean existOnTarget = targetFs.exists(fileToCopy);
+ if (this.skipPermissionCheck && existOnTarget) {
+ // Skip Permission Check for files that already exist in the
target when skipPermissionCheck is true
+ continue;
+ }
FileStatus srcFile = srcFs.getFileStatus(fileToCopy);
OwnerAndPermission replicatedPermission =
CopyableFile.resolveReplicatedOwnerAndPermission(srcFs, srcFile, configuration);
if (!existOnTarget || shouldCopy(targetFs, srcFile,
targetFs.getFileStatus(fileToCopy), replicatedPermission)) {