This is an automated email from the ASF dual-hosted git repository.

wlo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new e79375a55 [GOBBLIN-2043] Set execute bit only for new folders in 
manifest distcp (#3922)
e79375a55 is described below

commit e79375a555e96b6df2a79b7bb1a5b7fa309d1538
Author: William Lo <[email protected]>
AuthorDate: Wed Apr 17 15:48:53 2024 -0400

    [GOBBLIN-2043] Set execute bit only for new folders in manifest distcp 
(#3922)
    
    Set execute bit only for new folders in manifest distcp
---
 .../data/management/copy/ManifestBasedDataset.java  | 21 +++++++++++++++++----
 .../util/commit/SetPermissionCommitStep.java        |  2 ++
 .../dataset/ManifestBasedDatasetFinderTest.java     |  3 +++
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
index b81d59663..c6823d39b 100644
--- 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
+++ 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
@@ -26,10 +26,12 @@ import com.google.gson.JsonIOException;
 import com.google.gson.JsonSyntaxException;
 import java.io.IOException;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
+import java.util.Set;
 import java.util.TreeMap;
 import java.util.concurrent.TimeUnit;
 import lombok.extern.slf4j.Slf4j;
@@ -37,6 +39,7 @@ import org.apache.gobblin.commit.CommitStep;
 import org.apache.gobblin.data.management.copy.entities.PostPublishStep;
 import org.apache.gobblin.data.management.copy.entities.PrePublishStep;
 import org.apache.gobblin.data.management.partition.FileSet;
+import org.apache.gobblin.util.PathUtils;
 import org.apache.gobblin.util.commit.DeleteFileCommitStep;
 import org.apache.gobblin.util.commit.SetPermissionCommitStep;
 
@@ -130,10 +133,12 @@ public class ManifestBasedDataset implements 
IterableCopyableDataset {
             copyEntities.add(copyableFile);
 
             Path fromPath = srcFs.getFileStatus(fileToCopy).isDirectory() ? 
fileToCopy : fileToCopy.getParent();
-
-            ancestorOwnerAndPermissions.putAll(
-                
CopyableFile.resolveReplicatedAncestorOwnerAndPermissionsRecursively(srcFs, 
fromPath,
-                    new Path(commonFilesParent), configuration));
+            // Avoid duplicate calculation for the same ancestor
+            if 
(!ancestorOwnerAndPermissions.containsKey(PathUtils.getPathWithoutSchemeAndAuthority(fromPath).toString()))
 {
+              ancestorOwnerAndPermissions.putAll(
+                  
CopyableFile.resolveReplicatedAncestorOwnerAndPermissionsRecursively(srcFs, 
fromPath,
+                      new Path(commonFilesParent), configuration));
+            }
 
             if (existOnTarget && srcFile.isFile()) {
               // this is to match the existing publishing behavior where we 
won't rewrite the target when it's already existed
@@ -146,6 +151,14 @@ public class ManifestBasedDataset implements 
IterableCopyableDataset {
         }
       }
 
+      // Only set permission for newly created folders on target
+      // To change permissions for existing dirs, expectation is to add the 
folder to the manifest
+      Set<String> parentFolders = new 
HashSet<>(ancestorOwnerAndPermissions.keySet());
+      for (String folder : parentFolders) {
+        if (targetFs.exists(new Path(folder))) {
+          ancestorOwnerAndPermissions.remove(folder);
+        }
+      }
       Properties props = new Properties();
       props.setProperty(SetPermissionCommitStep.STOP_ON_ERROR_KEY, "true");
       CommitStep setPermissionCommitStep = new 
SetPermissionCommitStep(targetFs, ancestorOwnerAndPermissions, props);
diff --git 
a/gobblin-data-management/src/main/java/org/apache/gobblin/util/commit/SetPermissionCommitStep.java
 
b/gobblin-data-management/src/main/java/org/apache/gobblin/util/commit/SetPermissionCommitStep.java
index dd1a68bb4..0887c05bf 100644
--- 
a/gobblin-data-management/src/main/java/org/apache/gobblin/util/commit/SetPermissionCommitStep.java
+++ 
b/gobblin-data-management/src/main/java/org/apache/gobblin/util/commit/SetPermissionCommitStep.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.security.AccessControlException;
 
+import lombok.Getter;
 import lombok.extern.slf4j.Slf4j;
 
 import org.apache.gobblin.commit.CommitStep;
@@ -38,6 +39,7 @@ import 
org.apache.gobblin.data.management.copy.OwnerAndPermission;
  */
 @Slf4j
 public class SetPermissionCommitStep implements CommitStep {
+  @Getter
   Map<String, OwnerAndPermission> pathAndPermissions;
   private final URI fsUri;
   public final boolean stopOnError;
diff --git 
a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
 
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
index a0dbcacad..1adbe9ea3 100644
--- 
a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
+++ 
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
@@ -93,6 +93,8 @@ public class ManifestBasedDatasetFinderTest {
       FileSet<CopyEntity> fileSet = fileSets.next();
       Assert.assertEquals(fileSet.getFiles().size(), 3);  // 2 files to copy + 
1 post publish step
       Assert.assertTrue(((PostPublishStep) 
fileSet.getFiles().get(2)).getStep() instanceof SetPermissionCommitStep);
+      SetPermissionCommitStep step = (SetPermissionCommitStep) 
((PostPublishStep) fileSet.getFiles().get(2)).getStep();
+      Assert.assertEquals(step.getPathAndPermissions().keySet().size(), 2);
       Mockito.verify(manifestReadFs, Mockito.times(1)).exists(manifestPath);
       Mockito.verify(manifestReadFs, 
Mockito.times(1)).getFileStatus(manifestPath);
       Mockito.verify(manifestReadFs, Mockito.times(1)).open(manifestPath);
@@ -178,6 +180,7 @@ public class ManifestBasedDatasetFinderTest {
     Mockito.when(sourceFs.getUri()).thenReturn(SRC_FS_URI);
     Mockito.when(manifestReadFs.getUri()).thenReturn(MANIFEST_READ_FS_URI);
     Mockito.when(destFs.getUri()).thenReturn(DEST_FS_URI);
+    Mockito.when(destFs.exists(new Path("/tmp"))).thenReturn(true);
     
Mockito.when(sourceFs.getFileStatus(any(Path.class))).thenReturn(localFs.getFileStatus(new
 Path(tmpDir.toString())));
     Mockito.when(sourceFs.exists(any(Path.class))).thenReturn(true);
     Mockito.when(manifestReadFs.exists(any(Path.class))).thenReturn(true);

Reply via email to