This is an automated email from the ASF dual-hosted git repository.
wlo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new d9874f16b [GOBBLIN-2072] Fix permission issue where child dir gets the
incorrect permission se… (#3955)
d9874f16b is described below
commit d9874f16b73ef93d51bb880581d28ce43b46219e
Author: William Lo <[email protected]>
AuthorDate: Wed Jun 5 14:47:05 2024 -0400
[GOBBLIN-2072] Fix permission issue where child dir gets the incorrect
permission se… (#3955)
Fix permission issue where child dir gets the incorrect permission set in
setpermissionstep
---
.../gobblin/data/management/copy/CopyableFile.java | 4 ++++
.../data/management/copy/ManifestBasedDataset.java | 6 +++--
.../dataset/ManifestBasedDatasetFinderTest.java | 28 +++++++++++++++++++++-
.../manifestRootDirEmpty.json | 8 +++++++
4 files changed, 43 insertions(+), 3 deletions(-)
diff --git
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/CopyableFile.java
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/CopyableFile.java
index 9e9af05e3..1792f354d 100644
---
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/CopyableFile.java
+++
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/CopyableFile.java
@@ -440,6 +440,10 @@ public class CopyableFile extends CopyEntity implements
File {
Path toPath, CopyConfiguration copyConfiguration, Cache<String,
OwnerAndPermission> permissionMap)
throws IOException, ExecutionException {
+ if (fromPath == null) {
+ return Lists.newArrayList();
+ }
+
if (!PathUtils.isAncestor(toPath, fromPath)) {
throw new IOException(String.format("toPath %s must be an ancestor of
fromPath %s.", toPath, fromPath));
}
diff --git
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
index c6823d39b..013012041 100644
---
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
+++
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
@@ -132,9 +132,11 @@ public class ManifestBasedDataset implements
IterableCopyableDataset {
copyableFile.setFsDatasets(srcFs, targetFs);
copyEntities.add(copyableFile);
- Path fromPath = srcFs.getFileStatus(fileToCopy).isDirectory() ?
fileToCopy : fileToCopy.getParent();
+ // Always grab the parent since the above permission setting
should be setting the permission for a folder itself
+ // {@link CopyDataPublisher#preserveFileAttrInPublisher} will be
setting the permission for the empty child dir
+ Path fromPath = fileToCopy.getParent();
// Avoid duplicate calculation for the same ancestor
- if
(!ancestorOwnerAndPermissions.containsKey(PathUtils.getPathWithoutSchemeAndAuthority(fromPath).toString()))
{
+ if (fromPath != null &&
!ancestorOwnerAndPermissions.containsKey(PathUtils.getPathWithoutSchemeAndAuthority(fromPath).toString()))
{
ancestorOwnerAndPermissions.putAll(
CopyableFile.resolveReplicatedAncestorOwnerAndPermissionsRecursively(srcFs,
fromPath,
new Path(commonFilesParent), configuration));
diff --git
a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
index 1adbe9ea3..16ded2b79 100644
---
a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
+++
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
@@ -94,7 +94,8 @@ public class ManifestBasedDatasetFinderTest {
Assert.assertEquals(fileSet.getFiles().size(), 3); // 2 files to copy +
1 post publish step
Assert.assertTrue(((PostPublishStep)
fileSet.getFiles().get(2)).getStep() instanceof SetPermissionCommitStep);
SetPermissionCommitStep step = (SetPermissionCommitStep)
((PostPublishStep) fileSet.getFiles().get(2)).getStep();
- Assert.assertEquals(step.getPathAndPermissions().keySet().size(), 2);
+
+ Assert.assertEquals(step.getPathAndPermissions().keySet().size(), 1); //
SetPermissionCommitStep only applies to ancestors
Mockito.verify(manifestReadFs, Mockito.times(1)).exists(manifestPath);
Mockito.verify(manifestReadFs,
Mockito.times(1)).getFileStatus(manifestPath);
Mockito.verify(manifestReadFs, Mockito.times(1)).open(manifestPath);
@@ -173,6 +174,31 @@ public class ManifestBasedDatasetFinderTest {
}
}
+ @Test
+ public void testFindDatasetEmptyRoot() throws Exception {
+ //Get manifest Path
+ String manifestLocation =
getClass().getClassLoader().getResource("manifestBasedDistcpTest/manifestRootDirEmpty.json").getPath();
+
+ // Test manifestDatasetFinder
+ Properties props = new Properties();
+ props.setProperty("gobblin.copy.manifestBased.manifest.location",
manifestLocation);
+ props.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/");
+ ManifestBasedDatasetFinder finder = new
ManifestBasedDatasetFinder(localFs, props);
+ List<ManifestBasedDataset> datasets = finder.findDatasets();
+ Assert.assertEquals(datasets.size(), 1);
+ FileSystem sourceFs = Mockito.mock(FileSystem.class);
+ FileSystem manifestReadFs = Mockito.mock(FileSystem.class);
+ FileSystem destFs = Mockito.mock(FileSystem.class);
+ Path manifestPath = new Path(manifestLocation);
+ setSourceAndDestFsMocks(sourceFs, destFs, manifestPath, manifestReadFs);
+ Iterator<FileSet<CopyEntity>> fileSets = new
ManifestBasedDataset(sourceFs, manifestReadFs, manifestPath,
props).getFileSetIterator(destFs,
+ CopyConfiguration.builder(destFs, props).build());
+ Assert.assertTrue(fileSets.hasNext());
+ FileSet<CopyEntity> fileSet = fileSets.next();
+ Assert.assertEquals(fileSet.getFiles().size(), 2); // 1 files to copy + 1
post publish step
+ Assert.assertTrue(((PostPublishStep) fileSet.getFiles().get(1)).getStep()
instanceof SetPermissionCommitStep);
+ }
+
private void setSourceAndDestFsMocks(FileSystem sourceFs, FileSystem destFs,
Path manifestPath, FileSystem manifestReadFs) throws IOException,
URISyntaxException {
URI SRC_FS_URI = new URI("source", "the.source.org", "/", null);
URI MANIFEST_READ_FS_URI = new URI("manifest-read",
"the.manifest-source.org", "/", null);
diff --git
a/gobblin-data-management/src/test/resources/manifestBasedDistcpTest/manifestRootDirEmpty.json
b/gobblin-data-management/src/test/resources/manifestBasedDistcpTest/manifestRootDirEmpty.json
new file mode 100644
index 000000000..c9d59cae5
--- /dev/null
+++
b/gobblin-data-management/src/test/resources/manifestBasedDistcpTest/manifestRootDirEmpty.json
@@ -0,0 +1,8 @@
+[
+ {
+ "id":"1",
+ "fileName":"/",
+ "fileGroup":"/",
+ "fileSizeInBytes":"0"
+ }
+]
\ No newline at end of file