This is an automated email from the ASF dual-hosted git repository.
abhijain pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new 5da75cebec [GOBBLIN-2208] Acl preservation fix (#4117)
5da75cebec is described below
commit 5da75cebec5238656b3818a52d63ec68d630b714
Author: vsinghal85 <[email protected]>
AuthorDate: Wed May 21 15:01:32 2025 +0530
[GOBBLIN-2208] Acl preservation fix (#4117)
* Preserving ACL for directories
---
.../CreateDirectoryWithPermissionsCommitStep.java | 3 +-
.../java/org/apache/gobblin/util/HadoopUtils.java | 60 +++++--
.../org/apache/gobblin/util/HadoopUtilsTest.java | 175 ++++++++++++++++++++-
3 files changed, 226 insertions(+), 12 deletions(-)
diff --git
a/gobblin-data-management/src/main/java/org/apache/gobblin/util/commit/CreateDirectoryWithPermissionsCommitStep.java
b/gobblin-data-management/src/main/java/org/apache/gobblin/util/commit/CreateDirectoryWithPermissionsCommitStep.java
index 8540fdc8ca..26b3756cc3 100644
---
a/gobblin-data-management/src/main/java/org/apache/gobblin/util/commit/CreateDirectoryWithPermissionsCommitStep.java
+++
b/gobblin-data-management/src/main/java/org/apache/gobblin/util/commit/CreateDirectoryWithPermissionsCommitStep.java
@@ -75,7 +75,8 @@ public class CreateDirectoryWithPermissionsCommitStep
implements CommitStep {
try {
// Is a no-op if directory already exists, stops when it hits first
parent
// Sets the execute bit for USER in order to rename files to the
folder, so it should be reset after this step is completed
- HadoopUtils.ensureDirectoryExists(fs, path,
entry.getValue().listIterator(), throwOnError);
+ // Any inherited ACLs from parent are removed and directory is created
with only source ACLs
+ HadoopUtils.ensureDirectoryExists(fs, path,
entry.getValue().listIterator(), throwOnError, true);
} catch (IOException e) {
log.warn("Error while creating directory or setting owners/permission
on " + path, e);
if (this.throwOnError) {
diff --git
a/gobblin-utility/src/main/java/org/apache/gobblin/util/HadoopUtils.java
b/gobblin-utility/src/main/java/org/apache/gobblin/util/HadoopUtils.java
index 7d7017b154..f7070144c9 100644
--- a/gobblin-utility/src/main/java/org/apache/gobblin/util/HadoopUtils.java
+++ b/gobblin-utility/src/main/java/org/apache/gobblin/util/HadoopUtils.java
@@ -761,18 +761,51 @@ public class HadoopUtils {
}
}
+ /**
+ *
+ * This is a convenience method that delegates to {@link
#ensureDirectoryExists(FileSystem, Path, Iterator, boolean, boolean)}
+ * with copyOnlySourceAclToDest set to false, meaning it will preserve any
existing ACLs on the target directory.
+ *
+ * see {@link #ensureDirectoryExists(FileSystem, Path, Iterator, boolean,
boolean)} (...)}
+ */
+ public static void ensureDirectoryExists(FileSystem fs,
+ Path path,
+ Iterator<OwnerAndPermission> ownerAndPermissionIterator,
+ boolean failIfOwnerSetFails)
+ throws IOException {
+ ensureDirectoryExists(fs, path, ownerAndPermissionIterator,
failIfOwnerSetFails, false);
+ }
+
/**
* Creates a directory with the given path and enforces the given owner and
permissions recursively all the way up to root, or
* until the list of owner and permissions is exhausted.
- * If any of the parent directories already exists, it will not overwrite
the existing permissions and this function will be a no-op.
- * @param fs
- * @param path
- * @param ownerAndPermissionIterator
- * @param failIfOwnerSetFails
- * @throws IOException
+ *
+ * This method will:
+ * 1. Create the directory if it doesn't exist
+ * 2. Apply owner, group, and permission settings from the
OwnerAndPermission iterator
+ * 3. Apply ACL entries if present in the OwnerAndPermission
+ * 4. Recursively create and set permissions for parent directories if they
don't exist
+ *
+ * If any of the parent directories already exists, it will not overwrite
the existing permissions
+ * and this function will be a no-op for those directories.
+ *
+ * The method will add execute permission to the owner of each directory to
ensure proper access.
+ *
+ * @param fs The FileSystem instance to use for operations
+ * @param path The path of the directory to create
+ * @param ownerAndPermissionIterator Iterator containing OwnerAndPermission
objects for each level of the directory hierarchy.
+ * Each OwnerAndPermission contains:
+ * - owner: The owner of the directory
+ * - group: The group of the directory
+ * - permission: The FsPermission to set
+ * - aclEntries: List of ACL entries to
apply
+ * @param failIfOwnerSetFails If true, throws an IOException if setting
owner/group fails.
+ * If false, logs a warning and continues if
owner/group setting fails.
+ * @param copyOnlySourceAclToDest If true, removes any existing ACLs on the
target directory before applying new ones.
+ * If false, preserves existing ACLs and adds
new ones.
+ * @throws IOException If directory creation fails or if failIfOwnerSetFails
is true and setting owner/group fails
*/
-
- public static void ensureDirectoryExists(FileSystem fs, Path path,
Iterator<OwnerAndPermission> ownerAndPermissionIterator, boolean
failIfOwnerSetFails)
+ public static void ensureDirectoryExists(FileSystem fs, Path path,
Iterator<OwnerAndPermission> ownerAndPermissionIterator, boolean
failIfOwnerSetFails, boolean copyOnlySourceAclToDest)
throws IOException {
if (fs.exists(path)) {
@@ -783,14 +816,21 @@ public class HadoopUtils {
OwnerAndPermission ownerAndPermission =
ownerAndPermissionIterator.next();
if (path.getParent() != null) {
- ensureDirectoryExists(fs, path.getParent(),
ownerAndPermissionIterator, failIfOwnerSetFails);
+ ensureDirectoryExists(fs, path.getParent(),
ownerAndPermissionIterator, failIfOwnerSetFails, copyOnlySourceAclToDest);
}
if (!fs.mkdirs(path)) {
// fs.mkdirs returns false if path already existed. Do not overwrite
permissions
return;
}
-
+ try {
+ if (copyOnlySourceAclToDest) {
+ fs.removeAcl(path);
+ }
+ } catch(UnsupportedOperationException uoe) {
+ // ignore ACL calls through some unit tests, as it is not supported
for local FileSystem
+ log.info("removeACL operation is not supported for this file system",
uoe);
+ }
List<AclEntry> aclEntries = ownerAndPermission.getAclEntries();
if (!aclEntries.isEmpty()) {
// use modify acls instead of setAcl since latter requires all three
acl entry types: user, group and others
diff --git
a/gobblin-utility/src/test/java/org/apache/gobblin/util/HadoopUtilsTest.java
b/gobblin-utility/src/test/java/org/apache/gobblin/util/HadoopUtilsTest.java
index 7c6d51c1b3..31e1d4b8be 100644
--- a/gobblin-utility/src/test/java/org/apache/gobblin/util/HadoopUtilsTest.java
+++ b/gobblin-utility/src/test/java/org/apache/gobblin/util/HadoopUtilsTest.java
@@ -20,6 +20,8 @@ package org.apache.gobblin.util;
import java.io.IOException;
import java.net.URI;
import java.nio.file.AccessDeniedException;
+import java.util.Collections;
+import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -32,6 +34,10 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.Trash;
import org.apache.hadoop.fs.TrashPolicy;
+import org.apache.hadoop.fs.permission.AclEntry;
+import org.apache.hadoop.fs.permission.AclEntryScope;
+import org.apache.hadoop.fs.permission.AclEntryType;
+import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
@@ -42,15 +48,20 @@ import org.testng.Assert;
import org.testng.annotations.Test;
import com.google.common.base.Optional;
+import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.io.Files;
import org.apache.gobblin.configuration.State;
+import org.apache.gobblin.util.filesystem.OwnerAndPermission;
@Test(groups = { "gobblin.util" })
public class HadoopUtilsTest {
+ public static final String TEST_DIR_PATH = "path/to/dir";
+ public static final String TEST_CHILD_DIR_NAME = "HadoopUtilsTestDir";
+
@Test
public void fsShortSerializationTest() {
State state = new State();
@@ -75,7 +86,7 @@ public class HadoopUtilsTest {
@Test
public void testRenameRecursively() throws Exception {
- final Path hadoopUtilsTestDir = new
Path(Files.createTempDir().getAbsolutePath(), "HadoopUtilsTestDir");
+ final Path hadoopUtilsTestDir = new
Path(Files.createTempDir().getAbsolutePath(), TEST_CHILD_DIR_NAME);
FileSystem fs = FileSystem.getLocal(new Configuration());
try {
fs.mkdirs(hadoopUtilsTestDir);
@@ -333,4 +344,166 @@ public class HadoopUtilsTest {
Assert.assertFalse(fs.exists(hadoopUtilsTestDir));
Assert.assertTrue(fs.exists(trashPath));
}
+
+ @Test
+ public void testEnsureDirectoryExistsWithAclPreservation() throws Exception {
+ final Path testDir = new Path(new Path(TEST_DIR_PATH),
"HadoopUtilsTestDir");
+ FileSystem fs = Mockito.mock(FileSystem.class);
+ Path targetDir = new Path(testDir, "target");
+
+ Mockito.when(fs.exists(targetDir)).thenReturn(false);
+ Mockito.when(fs.exists(targetDir.getParent())).thenReturn(true);
+
+ // Create ACL entries
+ List<AclEntry> aclEntries = Lists.newArrayList(
+ new AclEntry.Builder()
+ .setName("user1")
+ .setType(AclEntryType.USER)
+ .setScope(AclEntryScope.ACCESS)
+ .setPermission(FsAction.ALL)
+ .build(),
+ new AclEntry.Builder()
+ .setName("group1")
+ .setType(AclEntryType.GROUP)
+ .setScope(AclEntryScope.ACCESS)
+ .setPermission(FsAction.READ_EXECUTE)
+ .build()
+ );
+
+ // Create OwnerAndPermission with the ACLs
+ OwnerAndPermission ownerAndPermission =
getOwnerAndPermissionForAclEntries(aclEntries);
+
+ // Mock mkdirs to return true
+ Mockito.when(fs.mkdirs(targetDir)).thenReturn(true);
+ // Call ensureDirectoryExists with copyOnlySourceAclToDest=true
+ HadoopUtils.ensureDirectoryExists(fs, targetDir,
+ Collections.singletonList(ownerAndPermission).listIterator(),
+ true, true);
+ // Verify mkdirs was called
+ Mockito.verify(fs).mkdirs(targetDir);
+ Mockito.verify(fs).removeAcl(targetDir);
+ // Verify modifyAclEntries was called with correct ACLs
+ Mockito.verify(fs).modifyAclEntries(targetDir, aclEntries);
+ }
+
+ @Test
+ public void testEnsureDirectoryExistsWithExistingDirectory() throws
Exception {
+ final Path testDir = new Path(new Path(TEST_DIR_PATH),
TEST_CHILD_DIR_NAME);
+ FileSystem fs = Mockito.mock(FileSystem.class);
+ // Create target directory path
+ Path targetDir = new Path(testDir, "target");
+
+ Mockito.when(fs.exists(targetDir)).thenReturn(true);
+ Mockito.when(fs.exists(targetDir.getParent())).thenReturn(true);
+
+ // Create new ACLs to set
+ List<AclEntry> aclEntries = Lists.newArrayList(
+ new AclEntry.Builder()
+ .setName("user2")
+ .setType(AclEntryType.USER)
+ .setScope(AclEntryScope.ACCESS)
+ .setPermission(FsAction.ALL)
+ .build()
+ );
+
+ OwnerAndPermission ownerAndPermission =
getOwnerAndPermissionForAclEntries(aclEntries);
+
+ // Call ensureDirectoryExists - should be a no-op since directory exists
+ HadoopUtils.ensureDirectoryExists(fs, targetDir,
+ Collections.singletonList(ownerAndPermission).listIterator(),
+ true, true);
+
+ // Verify mkdirs was not called
+ Mockito.verify(fs, Mockito.never()).mkdirs(targetDir);
+ // Verify removeAcl was not called
+ Mockito.verify(fs, Mockito.never()).removeAcl(targetDir);
+ // Verify setAcl was not called
+ Mockito.verify(fs,
Mockito.never()).modifyAclEntries(Mockito.any(Path.class), Mockito.anyList());
+
+ }
+
+ @Test
+ public void testEnsureDirectoryExistsWithAcl() throws Exception {
+ final Path testDir = new Path(new Path(TEST_DIR_PATH),
TEST_CHILD_DIR_NAME);
+ FileSystem fs = Mockito.mock(FileSystem.class);
+
+ // Create target directory path
+ Path targetDir = new Path(testDir, "target");
+
+ Mockito.when(fs.exists(targetDir)).thenReturn(false);
+ Mockito.when(fs.exists(targetDir.getParent())).thenReturn(true);
+
+ // Create ACL entries
+ List<AclEntry> aclEntries = Lists.newArrayList(
+ new AclEntry.Builder()
+ .setName("user1")
+ .setType(AclEntryType.USER)
+ .setScope(AclEntryScope.ACCESS)
+ .setPermission(FsAction.ALL)
+ .build(),
+ new AclEntry.Builder()
+ .setName("group1")
+ .setType(AclEntryType.GROUP)
+ .setScope(AclEntryScope.ACCESS)
+ .setPermission(FsAction.READ_EXECUTE)
+ .build()
+ );
+
+ // Create OwnerAndPermission with the ACLs
+ OwnerAndPermission ownerAndPermission =
getOwnerAndPermissionForAclEntries(aclEntries);
+
+ // Mock mkdirs to return true
+ Mockito.when(fs.mkdirs(targetDir)).thenReturn(true);
+
+ // Call ensureDirectoryExists
+ HadoopUtils.ensureDirectoryExists(fs, targetDir,
+ Collections.singletonList(ownerAndPermission).listIterator(),
+ true);
+
+ // Verify mkdirs was called
+ Mockito.verify(fs).mkdirs(targetDir);
+ // Verify removeAcl was never called
+ Mockito.verify(fs, Mockito.never()).removeAcl(Mockito.any(Path.class));
+ // Verify modifyAclEntries was called with correct ACLs
+ Mockito.verify(fs).modifyAclEntries(targetDir, aclEntries);
+ }
+
+ @Test
+ public void testEnsureDirectoryExistsWithEmptyAcl() throws Exception {
+ final Path testDir = new Path(new Path(TEST_DIR_PATH),
TEST_CHILD_DIR_NAME);
+ FileSystem fs = Mockito.mock(FileSystem.class);
+
+ // Create target directory path
+ Path targetDir = new Path(testDir, "target");
+
+ Mockito.when(fs.exists(targetDir)).thenReturn(false);
+ Mockito.when(fs.exists(targetDir.getParent())).thenReturn(true);
+
+ // Create OwnerAndPermission with empty ACLs
+ OwnerAndPermission ownerAndPermission =
getOwnerAndPermissionForAclEntries(Collections.emptyList());
+
+ // Mock mkdirs to return true
+ Mockito.when(fs.mkdirs(targetDir)).thenReturn(true);
+
+ // Call ensureDirectoryExists
+ HadoopUtils.ensureDirectoryExists(fs, targetDir,
+ Collections.singletonList(ownerAndPermission).listIterator(),
+ true);
+
+ // Verify mkdirs was called
+ Mockito.verify(fs).mkdirs(targetDir);
+ // Verify removeAcl was never called
+ Mockito.verify(fs, Mockito.never()).removeAcl(Mockito.any(Path.class));
+ // Verify modifyAclEntries was not called since ACLs are empty
+ Mockito.verify(fs,
Mockito.never()).modifyAclEntries(Mockito.any(Path.class), Mockito.anyList());
+ }
+
+ private OwnerAndPermission getOwnerAndPermissionForAclEntries(List<AclEntry>
aclEntries) {
+ return new OwnerAndPermission(
+ "owner",
+ "group",
+ new FsPermission("755"),
+ aclEntries
+ );
+ }
}