This is an automated email from the ASF dual-hosted git repository.

vivekrai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new 8d71c89cc1 [GOBBLIN-2206] Revert extra execute bit getting set in 
ManifestDistcp (#4115)
8d71c89cc1 is described below

commit 8d71c89cc16a1b8d2d4bf7f19182fca32a3608c4
Author: Vivek Rai <43493515+blazer-...@users.noreply.github.com>
AuthorDate: Mon May 5 13:11:35 2025 +0530

    [GOBBLIN-2206] Revert extra execute bit getting set in ManifestDistcp 
(#4115)
    
    * added check to add extra set permission step
    
    * add copy constructor for ownerandpermission
    
    * remove extra null check for checkstyle failure
---
 .../data/management/copy/ManifestBasedDataset.java | 22 ++++++-
 .../dataset/ManifestBasedDatasetFinderTest.java    | 72 ++++++++++++++++++++++
 .../sampleManifestWithOnlyDirectory.json           |  8 +++
 .../util/filesystem/OwnerAndPermission.java        | 16 +++++
 .../util/filesystem/OwnerAndPermissionTest.java    | 60 ++++++++++++++++++
 5 files changed, 177 insertions(+), 1 deletion(-)

diff --git 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
index 6d7df69043..8f9a78b7ce 100644
--- 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
+++ 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
@@ -18,6 +18,7 @@
 package org.apache.gobblin.data.management.copy;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
@@ -31,6 +32,7 @@ import java.util.concurrent.TimeUnit;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsAction;
 
 import com.google.common.base.Optional;
 import com.google.common.cache.Cache;
@@ -113,6 +115,7 @@ public class ManifestBasedDataset implements 
IterableCopyableDataset {
     List<FileStatus> toDelete = Lists.newArrayList();
     // map of paths and permissions sorted by depth of path, so that 
permissions can be set in order
     Map<String, List<OwnerAndPermission>> ancestorOwnerAndPermissions = new 
HashMap<>();
+    Map<String, List<OwnerAndPermission>> 
ancestorOwnerAndPermissionsForSetPermissionStep = new HashMap<>();
     TreeMap<String, OwnerAndPermission> flattenedAncestorPermissions = new 
TreeMap<>(
         Comparator.comparingInt((String o) -> 
o.split("/").length).thenComparing(o -> o));
     try {
@@ -143,12 +146,29 @@ public class ManifestBasedDataset implements 
IterableCopyableDataset {
             copyableFile.setFsDatasets(srcFs, targetFs);
             copyEntities.add(copyableFile);
 
+            // In case of directory with 000 permission, the permission is 
changed to 100 due to HadoopUtils::addExecutePermissionToOwner
+            // getting called from 
CopyDataPublisher::preserveFileAttrInPublisher -> 
FileAwareInputStreamDataWriter::setPathPermission ->
+            // FileAwareInputStreamDataWriter::setOwnerExecuteBitIfDirectory 
-> HadoopUtils::addExecutePermissionToOwner
+            // We need to revert this extra permission change in 
setPermissionStep
+            if (srcFile.isDirectory() && 
!srcFile.getPermission().getUserAction().implies(FsAction.EXECUTE)
+                && 
!ancestorOwnerAndPermissionsForSetPermissionStep.containsKey(PathUtils.getPathWithoutSchemeAndAuthority(fileToCopy).toString())
+                && !targetFs.exists(fileToCopy)) {
+              List<OwnerAndPermission> ancestorsOwnerAndPermissionUpdated = 
new ArrayList<>();
+              OwnerAndPermission srcFileOwnerPermissionReplicatedForDest = new 
OwnerAndPermission(copyableFile.getDestinationOwnerAndPermission());
+              
ancestorsOwnerAndPermissionUpdated.add(srcFileOwnerPermissionReplicatedForDest);
+              
copyableFile.getAncestorsOwnerAndPermission().forEach(ancestorOwnPerm -> 
ancestorsOwnerAndPermissionUpdated.add(new 
OwnerAndPermission(ancestorOwnPerm)));
+              
ancestorOwnerAndPermissionsForSetPermissionStep.put(fileToCopy.toString(), 
ancestorsOwnerAndPermissionUpdated);
+            }
+
             // Always grab the parent since the above permission setting 
should be setting the permission for a folder itself
             // {@link CopyDataPublisher#preserveFileAttrInPublisher} will be 
setting the permission for the empty child dir
             Path fromPath = fileToCopy.getParent();
             // Avoid duplicate calculation for the same ancestor
             if (fromPath != null && 
!ancestorOwnerAndPermissions.containsKey(PathUtils.getPathWithoutSchemeAndAuthority(fromPath).toString())
 && !targetFs.exists(fromPath)) {
               ancestorOwnerAndPermissions.put(fromPath.toString(), 
copyableFile.getAncestorsOwnerAndPermission());
+              if 
(!ancestorOwnerAndPermissionsForSetPermissionStep.containsKey(PathUtils.getPathWithoutSchemeAndAuthority(fromPath).toString()))
 {
+                
ancestorOwnerAndPermissionsForSetPermissionStep.put(fromPath.toString(), 
copyableFile.getAncestorsOwnerAndPermission());
+              }
             }
 
             if (existOnTarget && srcFile.isFile()) {
@@ -167,7 +187,7 @@ public class ManifestBasedDataset implements 
IterableCopyableDataset {
       copyEntities.add(new PrePublishStep(datasetURN(), Maps.newHashMap(), 
createDirectoryWithPermissionsCommitStep, 1));
 
       if (this.enableSetPermissionPostPublish) {
-        for (Map.Entry<String, List<OwnerAndPermission>> 
recursiveParentPermissions : ancestorOwnerAndPermissions.entrySet()) {
+        for (Map.Entry<String, List<OwnerAndPermission>> 
recursiveParentPermissions : 
ancestorOwnerAndPermissionsForSetPermissionStep.entrySet()) {
           Path currentPath = new Path(recursiveParentPermissions.getKey());
           for (OwnerAndPermission ownerAndPermission : 
recursiveParentPermissions.getValue()) {
             // Ignore folders that already exist in destination, we assume 
that the publisher will re-sync those permissions if needed and
diff --git 
a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
 
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
index d81de494d0..6acc668073 100644
--- 
a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
+++ 
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.fs.permission.AclStatus;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.mockito.Mockito;
 import org.testng.Assert;
+import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
 import com.google.common.io.Files;
@@ -343,6 +344,77 @@ public class ManifestBasedDatasetFinderTest {
     Assert.assertEquals(fileSet.getFiles().size(), 2);  // 1 files to copy + 1 
pre publish step
   }
 
+  @DataProvider
+  public Object[][] dirPermissionWithExpectedSetPermissionStepCount() {
+    return new Object[][] {
+        {"d---------", 2},
+        {"drw-rw--w-", 2},
+        {"d-w-r----x", 2},
+        {"drwxrwxrwx", 1},
+        {"dr-xr-xr-x", 1},
+        {"d--x------", 1},
+    };
+  }
+
+  @Test(dataProvider = "dirPermissionWithExpectedSetPermissionStepCount")
+  public void 
testSetPermissionWhenCopyingDirectoryWithOwnerExecutePermissionSetUnset(String 
dirPermission, int expectedCount) throws IOException, URISyntaxException {
+    Path manifestPath = new 
Path(getClass().getClassLoader().getResource("manifestBasedDistcpTest/sampleManifestWithOnlyDirectory.json").getPath());
+    Properties props = new Properties();
+    props.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/");
+    props.setProperty("gobblin.copy.preserved.attributes", "rbugpvta");
+    try (FileSystem sourceFs = Mockito.mock(FileSystem.class);
+        FileSystem manifestReadFs = Mockito.mock(FileSystem.class);
+        FileSystem destFs = Mockito.mock(FileSystem.class)) {
+
+      setSourceAndDestFsMocks(sourceFs, destFs, manifestPath, manifestReadFs, 
false);
+
+      Mockito.when(destFs.exists(new Path("/tmp/dataset"))).thenReturn(false);
+      Mockito.when(destFs.exists(new Path("/tmp"))).thenReturn(false);
+
+      
Mockito.when(destFs.getFileStatus(any(Path.class))).thenReturn(localFs.getFileStatus(new
 Path(tmpDir.toString())));
+
+      setFsMockPathWithPermissions(sourceFs, "/tmp/dataset", dirPermission, 
"owner1", "group1", true);
+      setFsMockPathWithPermissions(sourceFs, "/tmp", "dr--r--r--", "owner2", 
"group2", true);
+
+      Iterator<FileSet<CopyEntity>> fileSets = new 
ManifestBasedDataset(sourceFs, manifestReadFs, manifestPath, 
props).getFileSetIterator(destFs,
+          CopyConfiguration.builder(destFs, props).build());
+
+      Assert.assertTrue(fileSets.hasNext());
+      FileSet<CopyEntity> fileSet = fileSets.next();
+      // 1 dir to copy + 1 pre publish step + 1 post publish step
+      Assert.assertEquals(fileSet.getFiles().size(),3);
+
+      CommitStep createDirectoryStep = ((PrePublishStep) 
fileSet.getFiles().get(1)).getStep();
+      Assert.assertTrue(createDirectoryStep instanceof 
CreateDirectoryWithPermissionsCommitStep);
+      Map<String, List<OwnerAndPermission>> pathAndPermissions = 
((CreateDirectoryWithPermissionsCommitStep) 
createDirectoryStep).getPathAndPermissions();
+      Assert.assertEquals(pathAndPermissions.size(), 1);
+
+      Assert.assertTrue(pathAndPermissions.containsKey("/tmp"));
+      Assert.assertEquals(pathAndPermissions.get("/tmp").size(), 1);
+      
Assert.assertEquals(pathAndPermissions.get("/tmp").get(0).getFsPermission(), 
FsPermission.valueOf("dr--r--r--"));
+      Assert.assertEquals(pathAndPermissions.get("/tmp").get(0).getOwner(), 
"owner2");
+      Assert.assertEquals(pathAndPermissions.get("/tmp").get(0).getGroup(), 
"group2");
+
+      CommitStep setPermissionStep = ((PostPublishStep) 
fileSet.getFiles().get(2)).getStep();
+      Assert.assertTrue(setPermissionStep instanceof SetPermissionCommitStep);
+      Map<String, OwnerAndPermission> ownerAndPermissionMap = 
((SetPermissionCommitStep) setPermissionStep).getPathAndPermissions();
+      Assert.assertEquals(ownerAndPermissionMap.size(), expectedCount);
+
+      List<String> sortedMapKeys = new 
ArrayList<>(ownerAndPermissionMap.keySet());
+      Assert.assertEquals(sortedMapKeys.get(0), "/tmp");
+      Assert.assertEquals(ownerAndPermissionMap.get("/tmp").getFsPermission(), 
FsPermission.valueOf("dr--r--r--"));
+      Assert.assertEquals(ownerAndPermissionMap.get("/tmp").getOwner(), 
"owner2");
+      Assert.assertEquals(ownerAndPermissionMap.get("/tmp").getGroup(), 
"group2");
+
+      if (expectedCount > 1) {
+        Assert.assertEquals(sortedMapKeys.get(1), "/tmp/dataset");
+        
Assert.assertEquals(ownerAndPermissionMap.get("/tmp/dataset").getFsPermission(),
 FsPermission.valueOf(dirPermission));
+        
Assert.assertEquals(ownerAndPermissionMap.get("/tmp/dataset").getOwner(), 
"owner1");
+        
Assert.assertEquals(ownerAndPermissionMap.get("/tmp/dataset").getGroup(), 
"group1");
+      }
+    }
+  }
+
   private void setSourceAndDestFsMocks(FileSystem sourceFs, FileSystem destFs, 
Path manifestPath, FileSystem manifestReadFs, boolean setFileStatusMock) throws 
IOException, URISyntaxException {
     URI SRC_FS_URI = new URI("source", "the.source.org", "/", null);
     URI MANIFEST_READ_FS_URI = new URI("manifest-read", 
"the.manifest-source.org", "/", null);
diff --git 
a/gobblin-data-management/src/test/resources/manifestBasedDistcpTest/sampleManifestWithOnlyDirectory.json
 
b/gobblin-data-management/src/test/resources/manifestBasedDistcpTest/sampleManifestWithOnlyDirectory.json
new file mode 100644
index 0000000000..5dd94ff064
--- /dev/null
+++ 
b/gobblin-data-management/src/test/resources/manifestBasedDistcpTest/sampleManifestWithOnlyDirectory.json
@@ -0,0 +1,8 @@
+[
+  {
+    "id":"1",
+    "fileName":"/tmp/dataset",
+    "fileGroup":"/tmp/dataset",
+    "fileSizeInBytes":"1024"
+  }
+]
\ No newline at end of file
diff --git 
a/gobblin-utility/src/main/java/org/apache/gobblin/util/filesystem/OwnerAndPermission.java
 
b/gobblin-utility/src/main/java/org/apache/gobblin/util/filesystem/OwnerAndPermission.java
index 7ae4cdc3fa..b14ab4fbe2 100644
--- 
a/gobblin-utility/src/main/java/org/apache/gobblin/util/filesystem/OwnerAndPermission.java
+++ 
b/gobblin-utility/src/main/java/org/apache/gobblin/util/filesystem/OwnerAndPermission.java
@@ -54,6 +54,22 @@ public class OwnerAndPermission implements Writable {
     this(owner, group, fsPermission, Lists.newArrayList());
   }
 
+
+  /**
+   * Copy constructor for {@link OwnerAndPermission}.
+   * <p>
+   * Creates a new instance by copying the values from another {@link 
OwnerAndPermission} object.
+   * Performs a deep copy of {@link FsPermission} and a shallow copy of the 
{@code aclEntries} list,
+   * since {@link org.apache.hadoop.fs.permission.AclEntry} is immutable.
+   * </p>
+   *
+   * @param other the {@code OwnerAndPermission} instance to copy from.
+   */
+  public OwnerAndPermission(OwnerAndPermission other) {
+    this(other.owner, other.group, new FsPermission(other.fsPermission),
+        other.aclEntries == null ? Lists.newArrayList() : 
Lists.newArrayList(other.aclEntries));
+  }
+
   @Override
   public void write(DataOutput dataOutput) throws IOException {
     Text.writeString(dataOutput, this.owner);
diff --git 
a/gobblin-utility/src/test/java/org/apache/gobblin/util/filesystem/OwnerAndPermissionTest.java
 
b/gobblin-utility/src/test/java/org/apache/gobblin/util/filesystem/OwnerAndPermissionTest.java
new file mode 100644
index 0000000000..d2276ee9d0
--- /dev/null
+++ 
b/gobblin-utility/src/test/java/org/apache/gobblin/util/filesystem/OwnerAndPermissionTest.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.util.filesystem;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.hadoop.fs.permission.AclEntry;
+import org.apache.hadoop.fs.permission.AclEntryScope;
+import org.apache.hadoop.fs.permission.AclEntryType;
+import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+
+/** Tests for {@link OwnerAndPermission}*/
+public class OwnerAndPermissionTest {
+
+  @Test
+  public void testOwnerAndPermissionCopyConstructor() {
+    String owner = "testOwner";
+    String group = "testGroup";
+    FsPermission permission = new FsPermission((short) 0755);
+    AclEntry aclEntry = new AclEntry.Builder()
+        .setPermission(FsAction.READ_WRITE)
+        .setName("test-acl")
+        .setScope(AclEntryScope.DEFAULT)
+        .setType(AclEntryType.GROUP)
+        .build();
+    List<AclEntry> aclEntries = Collections.singletonList(aclEntry);
+
+    OwnerAndPermission ownerAndPermission = new OwnerAndPermission(owner, 
group, permission, aclEntries);
+    OwnerAndPermission copyOwnerAndPermission = new 
OwnerAndPermission(ownerAndPermission);
+
+    Assert.assertEquals(copyOwnerAndPermission.getOwner(), owner);
+    Assert.assertEquals(copyOwnerAndPermission.getGroup(), group);
+    Assert.assertEquals(copyOwnerAndPermission.getFsPermission(), permission);
+    Assert.assertEquals(copyOwnerAndPermission.getAclEntries(), aclEntries);
+
+    Assert.assertNotSame(ownerAndPermission, copyOwnerAndPermission);
+    Assert.assertNotSame(ownerAndPermission.getFsPermission(), 
copyOwnerAndPermission.getFsPermission());
+    Assert.assertNotSame(ownerAndPermission.getAclEntries(), 
copyOwnerAndPermission.getAclEntries());
+  }
+}

Reply via email to