This is an automated email from the ASF dual-hosted git repository.

wlo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new d9874f16b [GOBBLIN-2072] Fix permission issue where child dir gets the 
incorrect permission se… (#3955)
d9874f16b is described below

commit d9874f16b73ef93d51bb880581d28ce43b46219e
Author: William Lo <[email protected]>
AuthorDate: Wed Jun 5 14:47:05 2024 -0400

    [GOBBLIN-2072] Fix permission issue where child dir gets the incorrect 
permission se… (#3955)
    
    Fix permission issue where child dir gets the incorrect permission set in 
setpermissionstep
---
 .../gobblin/data/management/copy/CopyableFile.java |  4 ++++
 .../data/management/copy/ManifestBasedDataset.java |  6 +++--
 .../dataset/ManifestBasedDatasetFinderTest.java    | 28 +++++++++++++++++++++-
 .../manifestRootDirEmpty.json                      |  8 +++++++
 4 files changed, 43 insertions(+), 3 deletions(-)

diff --git 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/CopyableFile.java
 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/CopyableFile.java
index 9e9af05e3..1792f354d 100644
--- 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/CopyableFile.java
+++ 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/CopyableFile.java
@@ -440,6 +440,10 @@ public class CopyableFile extends CopyEntity implements 
File {
       Path toPath, CopyConfiguration copyConfiguration, Cache<String, 
OwnerAndPermission> permissionMap)
       throws IOException, ExecutionException {
 
+    if (fromPath == null) {
+      return Lists.newArrayList();
+    }
+
     if (!PathUtils.isAncestor(toPath, fromPath)) {
       throw new IOException(String.format("toPath %s must be an ancestor of 
fromPath %s.", toPath, fromPath));
     }
diff --git 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
index c6823d39b..013012041 100644
--- 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
+++ 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
@@ -132,9 +132,11 @@ public class ManifestBasedDataset implements 
IterableCopyableDataset {
             copyableFile.setFsDatasets(srcFs, targetFs);
             copyEntities.add(copyableFile);
 
-            Path fromPath = srcFs.getFileStatus(fileToCopy).isDirectory() ? 
fileToCopy : fileToCopy.getParent();
+            // Always grab the parent since the above permission setting 
should be setting the permission for a folder itself
+            // {@link CopyDataPublisher#preserveFileAttrInPublisher} will be 
setting the permission for the empty child dir
+            Path fromPath = fileToCopy.getParent();
             // Avoid duplicate calculation for the same ancestor
-            if 
(!ancestorOwnerAndPermissions.containsKey(PathUtils.getPathWithoutSchemeAndAuthority(fromPath).toString()))
 {
+            if (fromPath != null && 
!ancestorOwnerAndPermissions.containsKey(PathUtils.getPathWithoutSchemeAndAuthority(fromPath).toString()))
 {
               ancestorOwnerAndPermissions.putAll(
                   
CopyableFile.resolveReplicatedAncestorOwnerAndPermissionsRecursively(srcFs, 
fromPath,
                       new Path(commonFilesParent), configuration));
diff --git 
a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
 
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
index 1adbe9ea3..16ded2b79 100644
--- 
a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
+++ 
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
@@ -94,7 +94,8 @@ public class ManifestBasedDatasetFinderTest {
       Assert.assertEquals(fileSet.getFiles().size(), 3);  // 2 files to copy + 
1 post publish step
       Assert.assertTrue(((PostPublishStep) 
fileSet.getFiles().get(2)).getStep() instanceof SetPermissionCommitStep);
       SetPermissionCommitStep step = (SetPermissionCommitStep) 
((PostPublishStep) fileSet.getFiles().get(2)).getStep();
-      Assert.assertEquals(step.getPathAndPermissions().keySet().size(), 2);
+
+      Assert.assertEquals(step.getPathAndPermissions().keySet().size(), 1); // 
SetPermissionCommitStep only applies to ancestors
       Mockito.verify(manifestReadFs, Mockito.times(1)).exists(manifestPath);
       Mockito.verify(manifestReadFs, 
Mockito.times(1)).getFileStatus(manifestPath);
       Mockito.verify(manifestReadFs, Mockito.times(1)).open(manifestPath);
@@ -173,6 +174,31 @@ public class ManifestBasedDatasetFinderTest {
     }
   }
 
+  @Test
+  public void testFindDatasetEmptyRoot() throws Exception {
+    //Get manifest Path
+    String manifestLocation = 
getClass().getClassLoader().getResource("manifestBasedDistcpTest/manifestRootDirEmpty.json").getPath();
+
+    // Test manifestDatasetFinder
+    Properties props = new Properties();
+    props.setProperty("gobblin.copy.manifestBased.manifest.location", 
manifestLocation);
+    props.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/");
+    ManifestBasedDatasetFinder finder = new 
ManifestBasedDatasetFinder(localFs, props);
+    List<ManifestBasedDataset> datasets = finder.findDatasets();
+    Assert.assertEquals(datasets.size(), 1);
+    FileSystem sourceFs = Mockito.mock(FileSystem.class);
+    FileSystem manifestReadFs = Mockito.mock(FileSystem.class);
+    FileSystem destFs = Mockito.mock(FileSystem.class);
+    Path manifestPath = new Path(manifestLocation);
+    setSourceAndDestFsMocks(sourceFs, destFs, manifestPath, manifestReadFs);
+    Iterator<FileSet<CopyEntity>> fileSets = new 
ManifestBasedDataset(sourceFs, manifestReadFs, manifestPath, 
props).getFileSetIterator(destFs,
+        CopyConfiguration.builder(destFs, props).build());
+    Assert.assertTrue(fileSets.hasNext());
+    FileSet<CopyEntity> fileSet = fileSets.next();
+    Assert.assertEquals(fileSet.getFiles().size(), 2);  // 1 files to copy + 1 
post publish step
+    Assert.assertTrue(((PostPublishStep) fileSet.getFiles().get(1)).getStep() 
instanceof SetPermissionCommitStep);
+  }
+
   private void setSourceAndDestFsMocks(FileSystem sourceFs, FileSystem destFs, 
Path manifestPath, FileSystem manifestReadFs) throws IOException, 
URISyntaxException {
     URI SRC_FS_URI = new URI("source", "the.source.org", "/", null);
     URI MANIFEST_READ_FS_URI = new URI("manifest-read", 
"the.manifest-source.org", "/", null);
diff --git 
a/gobblin-data-management/src/test/resources/manifestBasedDistcpTest/manifestRootDirEmpty.json
 
b/gobblin-data-management/src/test/resources/manifestBasedDistcpTest/manifestRootDirEmpty.json
new file mode 100644
index 000000000..c9d59cae5
--- /dev/null
+++ 
b/gobblin-data-management/src/test/resources/manifestBasedDistcpTest/manifestRootDirEmpty.json
@@ -0,0 +1,8 @@
+[
+  {
+    "id":"1",
+    "fileName":"/",
+    "fileGroup":"/",
+    "fileSizeInBytes":"0"
+  }
+]
\ No newline at end of file

Reply via email to