This is an automated email from the ASF dual-hosted git repository.

abhijain pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new 7c54b95fd3 Fix owner execute permission bit issue for path existing in 
destination (#4147)
7c54b95fd3 is described below

commit 7c54b95fd34d57249a1fb351b504713ed43ccce1
Author: abhishekmjain <[email protected]>
AuthorDate: Tue Oct 28 16:20:21 2025 +0530

    Fix owner execute permission bit issue for path existing in destination 
(#4147)
---
 .../data/management/copy/ManifestBasedDataset.java |  22 ++-
 .../dataset/ManifestBasedDatasetFinderTest.java    | 157 ++++++++++++++++++++-
 .../manifestBasedDistcpTest/nestedDirectories.json |  20 +++
 3 files changed, 192 insertions(+), 7 deletions(-)

diff --git 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
index 8f9a78b7ce..531f506bf8 100644
--- 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
+++ 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
@@ -116,6 +116,7 @@ public class ManifestBasedDataset implements 
IterableCopyableDataset {
     // map of paths and permissions sorted by depth of path, so that 
permissions can be set in order
     Map<String, List<OwnerAndPermission>> ancestorOwnerAndPermissions = new 
HashMap<>();
     Map<String, List<OwnerAndPermission>> 
ancestorOwnerAndPermissionsForSetPermissionStep = new HashMap<>();
+    Map<String, OwnerAndPermission> 
existingDirectoryPermissionsForSetPermissionStep = new HashMap<>();
     TreeMap<String, OwnerAndPermission> flattenedAncestorPermissions = new 
TreeMap<>(
         Comparator.comparingInt((String o) -> 
o.split("/").length).thenComparing(o -> o));
     try {
@@ -151,13 +152,18 @@ public class ManifestBasedDataset implements 
IterableCopyableDataset {
             // FileAwareInputStreamDataWriter::setOwnerExecuteBitIfDirectory 
-> HadoopUtils::addExecutePermissionToOwner
             // We need to revert this extra permission change in 
setPermissionStep
             if (srcFile.isDirectory() && 
!srcFile.getPermission().getUserAction().implies(FsAction.EXECUTE)
-                && 
!ancestorOwnerAndPermissionsForSetPermissionStep.containsKey(PathUtils.getPathWithoutSchemeAndAuthority(fileToCopy).toString())
-                && !targetFs.exists(fileToCopy)) {
-              List<OwnerAndPermission> ancestorsOwnerAndPermissionUpdated = 
new ArrayList<>();
+                && 
!ancestorOwnerAndPermissionsForSetPermissionStep.containsKey(PathUtils.getPathWithoutSchemeAndAuthority(fileToCopy).toString()))
 {
               OwnerAndPermission srcFileOwnerPermissionReplicatedForDest = new 
OwnerAndPermission(copyableFile.getDestinationOwnerAndPermission());
-              
ancestorsOwnerAndPermissionUpdated.add(srcFileOwnerPermissionReplicatedForDest);
-              
copyableFile.getAncestorsOwnerAndPermission().forEach(ancestorOwnPerm -> 
ancestorsOwnerAndPermissionUpdated.add(new 
OwnerAndPermission(ancestorOwnPerm)));
-              
ancestorOwnerAndPermissionsForSetPermissionStep.put(fileToCopy.toString(), 
ancestorsOwnerAndPermissionUpdated);
+              if(!targetFs.exists(fileToCopy)) {
+                List<OwnerAndPermission> ancestorsOwnerAndPermissionUpdated = 
new ArrayList<>();
+                
ancestorsOwnerAndPermissionUpdated.add(srcFileOwnerPermissionReplicatedForDest);
+                
copyableFile.getAncestorsOwnerAndPermission().forEach(ancestorOwnPerm -> 
ancestorsOwnerAndPermissionUpdated.add(new 
OwnerAndPermission(ancestorOwnPerm)));
+                
ancestorOwnerAndPermissionsForSetPermissionStep.put(fileToCopy.toString(), 
ancestorsOwnerAndPermissionUpdated);
+              }
+              else {
+                // If the path exists, update only current directory 
permission in post publish step and not entire hierarchy.
+                
existingDirectoryPermissionsForSetPermissionStep.put(fileToCopy.toString(), 
srcFileOwnerPermissionReplicatedForDest);
+              }
             }
 
             // Always grab the parent since the above permission setting 
should be setting the permission for a folder itself
@@ -198,6 +204,10 @@ public class ManifestBasedDataset implements 
IterableCopyableDataset {
             currentPath = currentPath.getParent();
           }
         }
+        for (Map.Entry<String, OwnerAndPermission> 
existingDirectoryPermissions : 
existingDirectoryPermissionsForSetPermissionStep.entrySet()) {
+          Path currentPath = new Path(existingDirectoryPermissions.getKey());
+          flattenedAncestorPermissions.put(currentPath.toString(), 
existingDirectoryPermissions.getValue());
+        }
         CommitStep setPermissionCommitStep = new 
SetPermissionCommitStep(targetFs, flattenedAncestorPermissions, 
this.properties);
         copyEntities.add(new PostPublishStep(datasetURN(), Maps.newHashMap(), 
setPermissionCommitStep, 1));
       }
diff --git 
a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
 
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
index 6acc668073..2b62ccad8f 100644
--- 
a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
+++ 
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
@@ -159,7 +159,7 @@ public class ManifestBasedDatasetFinderTest {
       CommitStep setPermissionStep = ((PostPublishStep) 
fileSet.getFiles().get(3)).getStep();
       Assert.assertTrue(setPermissionStep instanceof SetPermissionCommitStep);
       Map<String, OwnerAndPermission> ownerAndPermissionMap = 
((SetPermissionCommitStep) setPermissionStep).getPathAndPermissions();
-      // Ignore /tmp as it already exists on destination
+      // Ignore /tmp as it is not part of manifest and already present in 
destination
       Assert.assertEquals(ownerAndPermissionMap.size(), 1);
       Assert.assertTrue(ownerAndPermissionMap.containsKey("/tmp/dataset"));
     }
@@ -415,6 +415,161 @@ public class ManifestBasedDatasetFinderTest {
     }
   }
 
+  /**
+   * Test correct permissions are set for existing directories.
+   */
+  @Test
+  public void testCorrectPermissionForExistingDirectoriesWithoutExecute() 
throws IOException, URISyntaxException {
+    Path manifestPath = new 
Path(getClass().getClassLoader().getResource("manifestBasedDistcpTest/sampleManifestWithOnlyDirectory.json").getPath());
+    Properties props = new Properties();
+    props.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/");
+    props.setProperty("gobblin.copy.preserved.attributes", "rbugpvta");
+
+    try (FileSystem sourceFs = Mockito.mock(FileSystem.class);
+        FileSystem manifestReadFs = Mockito.mock(FileSystem.class);
+        FileSystem destFs = Mockito.mock(FileSystem.class)) {
+
+      setSourceAndDestFsMocks(sourceFs, destFs, manifestPath, manifestReadFs, 
false);
+
+      // Key difference: directory already exists on target
+      Mockito.when(destFs.exists(new Path("/tmp/dataset"))).thenReturn(true);
+      Mockito.when(destFs.exists(new Path("/tmp"))).thenReturn(true);
+
+      
Mockito.when(destFs.getFileStatus(any(Path.class))).thenReturn(localFs.getFileStatus(new
 Path(tmpDir.toString())));
+
+      // Source directory has 000 permission (no execute bit)
+      setFsMockPathWithPermissions(sourceFs, "/tmp/dataset", "d---------", 
"owner1", "group1", true);
+      setFsMockPathWithPermissions(sourceFs, "/tmp", "dr--r--r--", "owner2", 
"group2", true);
+
+      Iterator<FileSet<CopyEntity>> fileSets = new 
ManifestBasedDataset(sourceFs, manifestReadFs, manifestPath, props)
+          .getFileSetIterator(destFs, CopyConfiguration.builder(destFs, 
props).build());
+
+      Assert.assertTrue(fileSets.hasNext());
+      FileSet<CopyEntity> fileSet = fileSets.next();
+      Assert.assertEquals(fileSet.getFiles().size(), 3); // 1 dir to copy + 1 
pre publish step + 1 post publish step
+
+      // Verify SetPermissionCommitStep includes the directory even though it 
exists on target
+      CommitStep setPermissionStep = ((PostPublishStep) 
fileSet.getFiles().get(2)).getStep();
+      Assert.assertTrue(setPermissionStep instanceof SetPermissionCommitStep);
+      Map<String, OwnerAndPermission> ownerAndPermissionMap = 
((SetPermissionCommitStep) setPermissionStep).getPathAndPermissions();
+
+      // Should include only /tmp/dataset and not /tmp since it is not part of 
manifest for permission correction
+      Assert.assertEquals(ownerAndPermissionMap.size(), 1);
+      Assert.assertTrue(ownerAndPermissionMap.containsKey("/tmp/dataset"));
+      
Assert.assertEquals(ownerAndPermissionMap.get("/tmp/dataset").getFsPermission(),
 FsPermission.valueOf("d---------"));
+      
Assert.assertEquals(ownerAndPermissionMap.get("/tmp/dataset").getOwner(), 
"owner1");
+      
Assert.assertEquals(ownerAndPermissionMap.get("/tmp/dataset").getGroup(), 
"group1");
+
+      Assert.assertFalse(ownerAndPermissionMap.containsKey("/tmp"));
+    }
+  }
+
+  /**
+   * Test correct permissions are set in post-publish step.
+   */
+  @Test
+  public void testCorrectPermissionForMixedExistingAndNewDirectories() throws 
IOException, URISyntaxException {
+    Path manifestPath = new 
Path(getClass().getClassLoader().getResource("manifestBasedDistcpTest/nestedDirectories.json").getPath());
+    Properties props = new Properties();
+    props.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/");
+    props.setProperty("gobblin.copy.preserved.attributes", "rbugpvta");
+
+    try (FileSystem sourceFs = Mockito.mock(FileSystem.class);
+        FileSystem manifestReadFs = Mockito.mock(FileSystem.class);
+        FileSystem destFs = Mockito.mock(FileSystem.class)) {
+
+      setSourceAndDestFsMocks(sourceFs, destFs, manifestPath, manifestReadFs, 
false);
+
+      // Mix of existing and non-existing directories
+      Mockito.when(destFs.exists(new Path("/tmp"))).thenReturn(true);
+      Mockito.when(destFs.exists(new Path("/tmp/dataset"))).thenReturn(true);
+      Mockito.when(destFs.exists(new 
Path("/tmp/dataset/hourly"))).thenReturn(false);
+      Mockito.when(destFs.exists(new 
Path("/tmp/dataset/hourly/metadata"))).thenReturn(false);
+      Mockito.when(destFs.exists(new Path("/tmp/dataset2"))).thenReturn(true);
+      Mockito.when(destFs.exists(new 
Path("/tmp/dataset2/hourly"))).thenReturn(true);
+      Mockito.when(destFs.exists(new 
Path("/tmp/dataset2/hourly/metadata"))).thenReturn(true);
+      Mockito.when(destFs.exists(new 
Path("/tmp/dataset2/hourly/metadata2"))).thenReturn(true);
+
+      
Mockito.when(destFs.getFileStatus(any(Path.class))).thenReturn(localFs.getFileStatus(new
 Path(tmpDir.toString())));
+
+      // Set up directories with various permissions, some without execute bit
+      setFsMockPathWithPermissions(sourceFs, "/tmp/dataset/hourly/metadata", 
"drw-rw-rw-", "owner1", "group1", true);
+      setFsMockPathWithPermissions(sourceFs, "/tmp/dataset2/hourly/metadata", 
"dr-xr--r--", "owner2", "group2", true);
+      setFsMockPathWithPermissions(sourceFs, "/tmp/dataset2/hourly/metadata2", 
"dr--r--r--", "owner2", "group2", true);
+      setFsMockPathWithPermissions(sourceFs, "/tmp/dataset/hourly", 
"drw-rw-rw-", "owner1", "group1", true);
+      setFsMockPathWithPermissions(sourceFs, "/tmp/dataset2/hourly", 
"dr--r--r--", "owner2", "group2", true);
+      setFsMockPathWithPermissions(sourceFs, "/tmp/dataset", "drw-r-----", 
"owner1", "group1", true);
+      setFsMockPathWithPermissions(sourceFs, "/tmp/dataset2", "dr--r--r--", 
"owner2", "group2", true);
+      setFsMockPathWithPermissions(sourceFs, "/tmp", "dr--r--r--", "owner3", 
"group3", true);
+
+      AclStatus aclStatusDest = 
buildAclStatusWithPermissions("user::r--,group::---,other::---", "group3", 
"owner3");
+      
Mockito.when(destFs.getAclStatus(any(Path.class))).thenReturn(aclStatusDest);
+
+      Iterator<FileSet<CopyEntity>> fileSets = new 
ManifestBasedDataset(sourceFs, manifestReadFs, manifestPath, props)
+          .getFileSetIterator(destFs, CopyConfiguration.builder(destFs, 
props).build());
+
+      Assert.assertTrue(fileSets.hasNext());
+      FileSet<CopyEntity> fileSet = fileSets.next();
+
+      CommitStep setPermissionStep = ((PostPublishStep) 
fileSet.getFiles().get(4)).getStep();
+      Assert.assertTrue(setPermissionStep instanceof SetPermissionCommitStep);
+      Map<String, OwnerAndPermission> ownerAndPermissionMap = 
((SetPermissionCommitStep) setPermissionStep).getPathAndPermissions();
+
+      // Not part of manifest and already exists in destination
+      Assert.assertFalse(ownerAndPermissionMap.containsKey("/tmp/dataset"));
+      Assert.assertFalse(ownerAndPermissionMap.containsKey("/tmp/dataset2"));
+
+      
Assert.assertFalse(ownerAndPermissionMap.containsKey("/tmp/dataset2/hourly/metadata"));
 // Owner execute is set
+
+      // Path not present in target
+      
Assert.assertTrue(ownerAndPermissionMap.containsKey("/tmp/dataset/hourly/metadata"));
+      
Assert.assertEquals(ownerAndPermissionMap.get("/tmp/dataset/hourly/metadata").getFsPermission(),
 FsPermission.valueOf("drw-rw-rw-"));
+      
Assert.assertTrue(ownerAndPermissionMap.containsKey("/tmp/dataset2/hourly/metadata2"));
+      
Assert.assertEquals(ownerAndPermissionMap.get("/tmp/dataset2/hourly/metadata2").getFsPermission(),
 FsPermission.valueOf("dr--r--r--"));
+    }
+  }
+
+  /**
+   * Test that directories with execute permission are NOT added to 
SetPermissionCommitStep
+   * when they already exist on target.
+   */
+  @Test
+  public void testNoPermissionFixForExistingDirectoriesWithExecute() throws 
IOException, URISyntaxException {
+    Path manifestPath = new 
Path(getClass().getClassLoader().getResource("manifestBasedDistcpTest/sampleManifestWithOnlyDirectory.json").getPath());
+    Properties props = new Properties();
+    props.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/");
+    props.setProperty("gobblin.copy.preserved.attributes", "rbugpvta");
+
+    try (FileSystem sourceFs = Mockito.mock(FileSystem.class);
+        FileSystem manifestReadFs = Mockito.mock(FileSystem.class);
+        FileSystem destFs = Mockito.mock(FileSystem.class)) {
+
+      setSourceAndDestFsMocks(sourceFs, destFs, manifestPath, manifestReadFs, 
false);
+
+      // Directory already exists on target
+      Mockito.when(destFs.exists(new Path("/tmp/dataset"))).thenReturn(true);
+      Mockito.when(destFs.exists(new Path("/tmp"))).thenReturn(true);
+
+      
Mockito.when(destFs.getFileStatus(any(Path.class))).thenReturn(localFs.getFileStatus(new
 Path(tmpDir.toString())));
+
+      // Source directory has execute permission (should NOT need permission 
fix)
+      setFsMockPathWithPermissions(sourceFs, "/tmp/dataset", "drwxrwxrwx", 
"owner1", "group1", true);
+      setFsMockPathWithPermissions(sourceFs, "/tmp", "dr-xr-xr-x", "owner2", 
"group2", true);
+
+      Iterator<FileSet<CopyEntity>> fileSets = new 
ManifestBasedDataset(sourceFs, manifestReadFs, manifestPath, props)
+          .getFileSetIterator(destFs, CopyConfiguration.builder(destFs, 
props).build());
+
+      Assert.assertTrue(fileSets.hasNext());
+      FileSet<CopyEntity> fileSet = fileSets.next();
+
+      CommitStep setPermissionStep = ((PostPublishStep) 
fileSet.getFiles().get(2)).getStep();
+      Assert.assertTrue(setPermissionStep instanceof SetPermissionCommitStep);
+      Map<String, OwnerAndPermission> ownerAndPermissionMap = 
((SetPermissionCommitStep) setPermissionStep).getPathAndPermissions();
+
+      Assert.assertEquals(ownerAndPermissionMap.size(), 0);
+    }
+  }
+
   private void setSourceAndDestFsMocks(FileSystem sourceFs, FileSystem destFs, 
Path manifestPath, FileSystem manifestReadFs, boolean setFileStatusMock) throws 
IOException, URISyntaxException {
     URI SRC_FS_URI = new URI("source", "the.source.org", "/", null);
     URI MANIFEST_READ_FS_URI = new URI("manifest-read", 
"the.manifest-source.org", "/", null);
diff --git 
a/gobblin-data-management/src/test/resources/manifestBasedDistcpTest/nestedDirectories.json
 
b/gobblin-data-management/src/test/resources/manifestBasedDistcpTest/nestedDirectories.json
new file mode 100644
index 0000000000..beb9f5572b
--- /dev/null
+++ 
b/gobblin-data-management/src/test/resources/manifestBasedDistcpTest/nestedDirectories.json
@@ -0,0 +1,20 @@
+[
+  {
+    "id":"1",
+    "fileName":"/tmp/dataset/hourly/metadata",
+    "fileGroup":"/tmp/dataset",
+    "fileSizeInBytes":"1024"
+  },
+  {
+    "id":"2",
+    "fileName":"/tmp/dataset2/hourly/metadata",
+    "fileGroup":"/tmp/dataset2",
+    "fileSizeInBytes":"1028"
+  },
+  {
+    "id":"3",
+    "fileName":"/tmp/dataset2/hourly/metadata2",
+    "fileGroup":"/tmp/dataset2",
+    "fileSizeInBytes":"1028"
+  }
+]
\ No newline at end of file

Reply via email to