This is an automated email from the ASF dual-hosted git repository.

abhijain pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new 5da75cebec [GOBBLIN-2208] Acl preservation fix (#4117)
5da75cebec is described below

commit 5da75cebec5238656b3818a52d63ec68d630b714
Author: vsinghal85 <[email protected]>
AuthorDate: Wed May 21 15:01:32 2025 +0530

    [GOBBLIN-2208] Acl preservation fix (#4117)
    
    * Preserving ACL for directories
---
 .../CreateDirectoryWithPermissionsCommitStep.java  |   3 +-
 .../java/org/apache/gobblin/util/HadoopUtils.java  |  60 +++++--
 .../org/apache/gobblin/util/HadoopUtilsTest.java   | 175 ++++++++++++++++++++-
 3 files changed, 226 insertions(+), 12 deletions(-)

diff --git 
a/gobblin-data-management/src/main/java/org/apache/gobblin/util/commit/CreateDirectoryWithPermissionsCommitStep.java
 
b/gobblin-data-management/src/main/java/org/apache/gobblin/util/commit/CreateDirectoryWithPermissionsCommitStep.java
index 8540fdc8ca..26b3756cc3 100644
--- 
a/gobblin-data-management/src/main/java/org/apache/gobblin/util/commit/CreateDirectoryWithPermissionsCommitStep.java
+++ 
b/gobblin-data-management/src/main/java/org/apache/gobblin/util/commit/CreateDirectoryWithPermissionsCommitStep.java
@@ -75,7 +75,8 @@ public class CreateDirectoryWithPermissionsCommitStep 
implements CommitStep {
       try {
         // Is a no-op if directory already exists, stops when it hits first 
parent
         // Sets the execute bit for USER in order to rename files to the 
folder, so it should be reset after this step is completed
-        HadoopUtils.ensureDirectoryExists(fs, path, 
entry.getValue().listIterator(), throwOnError);
+        // Any inherited ACLs from parent are removed and directory is created 
with only source ACLs
+        HadoopUtils.ensureDirectoryExists(fs, path, 
entry.getValue().listIterator(), throwOnError, true);
       } catch (IOException e) {
         log.warn("Error while creating directory or setting owners/permission 
on " + path, e);
         if (this.throwOnError) {
diff --git 
a/gobblin-utility/src/main/java/org/apache/gobblin/util/HadoopUtils.java 
b/gobblin-utility/src/main/java/org/apache/gobblin/util/HadoopUtils.java
index 7d7017b154..f7070144c9 100644
--- a/gobblin-utility/src/main/java/org/apache/gobblin/util/HadoopUtils.java
+++ b/gobblin-utility/src/main/java/org/apache/gobblin/util/HadoopUtils.java
@@ -761,18 +761,51 @@ public class HadoopUtils {
     }
   }
 
+  /**
+   *
+   * This is a convenience method that delegates to {@link 
#ensureDirectoryExists(FileSystem, Path, Iterator, boolean, boolean)}
+   * with copyOnlySourceAclToDest set to false, meaning it will preserve any 
existing ACLs on the target directory.
+   *
+   * see {@link #ensureDirectoryExists(FileSystem, Path, Iterator, boolean, 
boolean)} (...)}
+   */
+  public static void ensureDirectoryExists(FileSystem fs,
+      Path path,
+      Iterator<OwnerAndPermission> ownerAndPermissionIterator,
+      boolean failIfOwnerSetFails)
+      throws IOException {
+    ensureDirectoryExists(fs, path, ownerAndPermissionIterator, 
failIfOwnerSetFails, false);
+  }
+
   /**
    * Creates a directory with the given path and enforces the given owner and 
permissions recursively all the way up to root, or
    * until the list of owner and permissions is exhausted.
-   * If any of the parent directories already exists, it will not overwrite 
the existing permissions and this function will be a no-op.
-   * @param fs
-   * @param path
-   * @param ownerAndPermissionIterator
-   * @param failIfOwnerSetFails
-   * @throws IOException
+   *
+   * This method will:
+   * 1. Create the directory if it doesn't exist
+   * 2. Apply owner, group, and permission settings from the 
OwnerAndPermission iterator
+   * 3. Apply ACL entries if present in the OwnerAndPermission
+   * 4. Recursively create and set permissions for parent directories if they 
don't exist
+   *
+   * If any of the parent directories already exists, it will not overwrite 
the existing permissions
+   * and this function will be a no-op for those directories.
+   *
+   * The method will add execute permission to the owner of each directory to 
ensure proper access.
+   *
+   * @param fs The FileSystem instance to use for operations
+   * @param path The path of the directory to create
+   * @param ownerAndPermissionIterator Iterator containing OwnerAndPermission 
objects for each level of the directory hierarchy.
+   *                                  Each OwnerAndPermission contains:
+   *                                  - owner: The owner of the directory
+   *                                  - group: The group of the directory
+   *                                  - permission: The FsPermission to set
+   *                                  - aclEntries: List of ACL entries to 
apply
+   * @param failIfOwnerSetFails If true, throws an IOException if setting 
owner/group fails.
+   *                           If false, logs a warning and continues if 
owner/group setting fails.
+   * @param copyOnlySourceAclToDest If true, removes any existing ACLs on the 
target directory before applying new ones.
+   *                               If false, preserves existing ACLs and adds 
new ones.
+   * @throws IOException If directory creation fails or if failIfOwnerSetFails 
is true and setting owner/group fails
    */
-
-  public static void ensureDirectoryExists(FileSystem fs, Path path, 
Iterator<OwnerAndPermission> ownerAndPermissionIterator, boolean 
failIfOwnerSetFails)
+  public static void ensureDirectoryExists(FileSystem fs, Path path, 
Iterator<OwnerAndPermission> ownerAndPermissionIterator, boolean 
failIfOwnerSetFails, boolean copyOnlySourceAclToDest)
       throws IOException {
 
     if (fs.exists(path)) {
@@ -783,14 +816,21 @@ public class HadoopUtils {
       OwnerAndPermission ownerAndPermission = 
ownerAndPermissionIterator.next();
 
       if (path.getParent() != null) {
-        ensureDirectoryExists(fs, path.getParent(), 
ownerAndPermissionIterator, failIfOwnerSetFails);
+        ensureDirectoryExists(fs, path.getParent(), 
ownerAndPermissionIterator, failIfOwnerSetFails, copyOnlySourceAclToDest);
       }
 
       if (!fs.mkdirs(path)) {
         // fs.mkdirs returns false if path already existed. Do not overwrite 
permissions
         return;
       }
-
+      try {
+        if (copyOnlySourceAclToDest) {
+          fs.removeAcl(path);
+        }
+      } catch(UnsupportedOperationException uoe) {
+        // ignore ACL calls through some unit tests, as it is not supported 
for local FileSystem
+        log.info("removeACL operation is not supported for this file system", 
uoe);
+      }
       List<AclEntry> aclEntries = ownerAndPermission.getAclEntries();
       if (!aclEntries.isEmpty()) {
         // use modify acls instead of setAcl since latter requires all three 
acl entry types: user, group and others
diff --git 
a/gobblin-utility/src/test/java/org/apache/gobblin/util/HadoopUtilsTest.java 
b/gobblin-utility/src/test/java/org/apache/gobblin/util/HadoopUtilsTest.java
index 7c6d51c1b3..31e1d4b8be 100644
--- a/gobblin-utility/src/test/java/org/apache/gobblin/util/HadoopUtilsTest.java
+++ b/gobblin-utility/src/test/java/org/apache/gobblin/util/HadoopUtilsTest.java
@@ -20,6 +20,8 @@ package org.apache.gobblin.util;
 import java.io.IOException;
 import java.net.URI;
 import java.nio.file.AccessDeniedException;
+import java.util.Collections;
+import java.util.List;
 import java.util.Map;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -32,6 +34,10 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Trash;
 import org.apache.hadoop.fs.TrashPolicy;
+import org.apache.hadoop.fs.permission.AclEntry;
+import org.apache.hadoop.fs.permission.AclEntryScope;
+import org.apache.hadoop.fs.permission.AclEntryType;
+import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.mockito.Mockito;
 import org.mockito.invocation.InvocationOnMock;
@@ -42,15 +48,20 @@ import org.testng.Assert;
 import org.testng.annotations.Test;
 
 import com.google.common.base.Optional;
+import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.io.Files;
 
 import org.apache.gobblin.configuration.State;
+import org.apache.gobblin.util.filesystem.OwnerAndPermission;
 
 
 @Test(groups = { "gobblin.util" })
 public class HadoopUtilsTest {
 
+  public static final String TEST_DIR_PATH = "path/to/dir";
+  public static final String TEST_CHILD_DIR_NAME = "HadoopUtilsTestDir";
+
   @Test
   public void fsShortSerializationTest() {
     State state = new State();
@@ -75,7 +86,7 @@ public class HadoopUtilsTest {
   @Test
   public void testRenameRecursively() throws Exception {
 
-    final Path hadoopUtilsTestDir = new 
Path(Files.createTempDir().getAbsolutePath(), "HadoopUtilsTestDir");
+    final Path hadoopUtilsTestDir = new 
Path(Files.createTempDir().getAbsolutePath(), TEST_CHILD_DIR_NAME);
     FileSystem fs = FileSystem.getLocal(new Configuration());
     try {
       fs.mkdirs(hadoopUtilsTestDir);
@@ -333,4 +344,166 @@ public class HadoopUtilsTest {
     Assert.assertFalse(fs.exists(hadoopUtilsTestDir));
     Assert.assertTrue(fs.exists(trashPath));
   }
+
+  @Test
+  public void testEnsureDirectoryExistsWithAclPreservation() throws Exception {
+    final Path testDir = new Path(new Path(TEST_DIR_PATH), 
"HadoopUtilsTestDir");
+    FileSystem fs = Mockito.mock(FileSystem.class);
+    Path targetDir = new Path(testDir, "target");
+
+    Mockito.when(fs.exists(targetDir)).thenReturn(false);
+    Mockito.when(fs.exists(targetDir.getParent())).thenReturn(true);
+
+    // Create ACL entries
+    List<AclEntry> aclEntries = Lists.newArrayList(
+        new AclEntry.Builder()
+            .setName("user1")
+            .setType(AclEntryType.USER)
+            .setScope(AclEntryScope.ACCESS)
+            .setPermission(FsAction.ALL)
+            .build(),
+        new AclEntry.Builder()
+            .setName("group1")
+            .setType(AclEntryType.GROUP)
+            .setScope(AclEntryScope.ACCESS)
+            .setPermission(FsAction.READ_EXECUTE)
+            .build()
+    );
+
+    // Create OwnerAndPermission with the ACLs
+    OwnerAndPermission ownerAndPermission = 
getOwnerAndPermissionForAclEntries(aclEntries);
+
+    // Mock mkdirs to return true
+    Mockito.when(fs.mkdirs(targetDir)).thenReturn(true);
+    // Call ensureDirectoryExists with copyOnlySourceAclToDest=true
+    HadoopUtils.ensureDirectoryExists(fs, targetDir,
+        Collections.singletonList(ownerAndPermission).listIterator(),
+        true, true);
+    // Verify mkdirs was called
+    Mockito.verify(fs).mkdirs(targetDir);
+    Mockito.verify(fs).removeAcl(targetDir);
+    // Verify modifyAclEntries was called with correct ACLs
+    Mockito.verify(fs).modifyAclEntries(targetDir, aclEntries);
+  }
+
+  @Test
+  public void testEnsureDirectoryExistsWithExistingDirectory() throws 
Exception {
+    final Path testDir = new Path(new Path(TEST_DIR_PATH), 
TEST_CHILD_DIR_NAME);
+    FileSystem fs = Mockito.mock(FileSystem.class);
+    // Create target directory path
+    Path targetDir = new Path(testDir, "target");
+
+    Mockito.when(fs.exists(targetDir)).thenReturn(true);
+    Mockito.when(fs.exists(targetDir.getParent())).thenReturn(true);
+
+    // Create new ACLs to set
+    List<AclEntry> aclEntries = Lists.newArrayList(
+        new AclEntry.Builder()
+            .setName("user2")
+            .setType(AclEntryType.USER)
+            .setScope(AclEntryScope.ACCESS)
+            .setPermission(FsAction.ALL)
+            .build()
+    );
+
+    OwnerAndPermission ownerAndPermission = 
getOwnerAndPermissionForAclEntries(aclEntries);
+
+    // Call ensureDirectoryExists - should be a no-op since directory exists
+    HadoopUtils.ensureDirectoryExists(fs, targetDir,
+        Collections.singletonList(ownerAndPermission).listIterator(),
+        true, true);
+
+    // Verify mkdirs was not called
+    Mockito.verify(fs, Mockito.never()).mkdirs(targetDir);
+    // Verify removeAcl was not called
+    Mockito.verify(fs, Mockito.never()).removeAcl(targetDir);
+    // Verify setAcl was not called
+    Mockito.verify(fs, 
Mockito.never()).modifyAclEntries(Mockito.any(Path.class), Mockito.anyList());
+
+  }
+
+  @Test
+  public void testEnsureDirectoryExistsWithAcl() throws Exception {
+    final Path testDir = new Path(new Path(TEST_DIR_PATH), 
TEST_CHILD_DIR_NAME);
+    FileSystem fs = Mockito.mock(FileSystem.class);
+
+    // Create target directory path
+    Path targetDir = new Path(testDir, "target");
+
+    Mockito.when(fs.exists(targetDir)).thenReturn(false);
+    Mockito.when(fs.exists(targetDir.getParent())).thenReturn(true);
+
+    // Create ACL entries
+    List<AclEntry> aclEntries = Lists.newArrayList(
+        new AclEntry.Builder()
+            .setName("user1")
+            .setType(AclEntryType.USER)
+            .setScope(AclEntryScope.ACCESS)
+            .setPermission(FsAction.ALL)
+            .build(),
+        new AclEntry.Builder()
+            .setName("group1")
+            .setType(AclEntryType.GROUP)
+            .setScope(AclEntryScope.ACCESS)
+            .setPermission(FsAction.READ_EXECUTE)
+            .build()
+    );
+
+    // Create OwnerAndPermission with the ACLs
+    OwnerAndPermission ownerAndPermission = 
getOwnerAndPermissionForAclEntries(aclEntries);
+
+    // Mock mkdirs to return true
+    Mockito.when(fs.mkdirs(targetDir)).thenReturn(true);
+
+    // Call ensureDirectoryExists
+    HadoopUtils.ensureDirectoryExists(fs, targetDir,
+        Collections.singletonList(ownerAndPermission).listIterator(),
+        true);
+
+    // Verify mkdirs was called
+    Mockito.verify(fs).mkdirs(targetDir);
+    // Verify removeAcl was never called
+    Mockito.verify(fs, Mockito.never()).removeAcl(Mockito.any(Path.class));
+    // Verify modifyAclEntries was called with correct ACLs
+    Mockito.verify(fs).modifyAclEntries(targetDir, aclEntries);
+  }
+
+  @Test
+  public void testEnsureDirectoryExistsWithEmptyAcl() throws Exception {
+    final Path testDir = new Path(new Path(TEST_DIR_PATH), 
TEST_CHILD_DIR_NAME);
+    FileSystem fs = Mockito.mock(FileSystem.class);
+
+    // Create target directory path
+    Path targetDir = new Path(testDir, "target");
+
+    Mockito.when(fs.exists(targetDir)).thenReturn(false);
+    Mockito.when(fs.exists(targetDir.getParent())).thenReturn(true);
+
+    // Create OwnerAndPermission with empty ACLs
+    OwnerAndPermission ownerAndPermission = 
getOwnerAndPermissionForAclEntries(Collections.emptyList());
+
+    // Mock mkdirs to return true
+    Mockito.when(fs.mkdirs(targetDir)).thenReturn(true);
+
+    // Call ensureDirectoryExists
+    HadoopUtils.ensureDirectoryExists(fs, targetDir,
+        Collections.singletonList(ownerAndPermission).listIterator(),
+        true);
+
+    // Verify mkdirs was called
+    Mockito.verify(fs).mkdirs(targetDir);
+    // Verify removeAcl was never called
+    Mockito.verify(fs, Mockito.never()).removeAcl(Mockito.any(Path.class));
+    // Verify modifyAclEntries was not called since ACLs are empty
+    Mockito.verify(fs, 
Mockito.never()).modifyAclEntries(Mockito.any(Path.class), Mockito.anyList());
+  }
+
+  private OwnerAndPermission getOwnerAndPermissionForAclEntries(List<AclEntry> 
aclEntries) {
+    return new OwnerAndPermission(
+        "owner",
+        "group",
+        new FsPermission("755"),
+        aclEntries
+    );
+  }
 }

Reply via email to