This is an automated email from the ASF dual-hosted git repository.
wlo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new e79375a55 [GOBBLIN-2043] Set execute bit only for new folders in
manifest distcp (#3922)
e79375a55 is described below
commit e79375a555e96b6df2a79b7bb1a5b7fa309d1538
Author: William Lo <[email protected]>
AuthorDate: Wed Apr 17 15:48:53 2024 -0400
[GOBBLIN-2043] Set execute bit only for new folders in manifest distcp
(#3922)
Set execute bit only for new folders in manifest distcp
---
.../data/management/copy/ManifestBasedDataset.java | 21 +++++++++++++++++----
.../util/commit/SetPermissionCommitStep.java | 2 ++
.../dataset/ManifestBasedDatasetFinderTest.java | 3 +++
3 files changed, 22 insertions(+), 4 deletions(-)
diff --git
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
index b81d59663..c6823d39b 100644
---
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
+++
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/ManifestBasedDataset.java
@@ -26,10 +26,12 @@ import com.google.gson.JsonIOException;
import com.google.gson.JsonSyntaxException;
import java.io.IOException;
import java.util.Collections;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
+import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.TimeUnit;
import lombok.extern.slf4j.Slf4j;
@@ -37,6 +39,7 @@ import org.apache.gobblin.commit.CommitStep;
import org.apache.gobblin.data.management.copy.entities.PostPublishStep;
import org.apache.gobblin.data.management.copy.entities.PrePublishStep;
import org.apache.gobblin.data.management.partition.FileSet;
+import org.apache.gobblin.util.PathUtils;
import org.apache.gobblin.util.commit.DeleteFileCommitStep;
import org.apache.gobblin.util.commit.SetPermissionCommitStep;
@@ -130,10 +133,12 @@ public class ManifestBasedDataset implements
IterableCopyableDataset {
copyEntities.add(copyableFile);
Path fromPath = srcFs.getFileStatus(fileToCopy).isDirectory() ?
fileToCopy : fileToCopy.getParent();
-
- ancestorOwnerAndPermissions.putAll(
-
CopyableFile.resolveReplicatedAncestorOwnerAndPermissionsRecursively(srcFs,
fromPath,
- new Path(commonFilesParent), configuration));
+ // Avoid duplicate calculation for the same ancestor
+ if
(!ancestorOwnerAndPermissions.containsKey(PathUtils.getPathWithoutSchemeAndAuthority(fromPath).toString()))
{
+ ancestorOwnerAndPermissions.putAll(
+
CopyableFile.resolveReplicatedAncestorOwnerAndPermissionsRecursively(srcFs,
fromPath,
+ new Path(commonFilesParent), configuration));
+ }
if (existOnTarget && srcFile.isFile()) {
// this is to match the existing publishing behavior where we
won't rewrite the target when it's already existed
@@ -146,6 +151,14 @@ public class ManifestBasedDataset implements
IterableCopyableDataset {
}
}
+ // Only set permission for newly created folders on target
+ // To change permissions for existing dirs, expectation is to add the
folder to the manifest
+ Set<String> parentFolders = new
HashSet<>(ancestorOwnerAndPermissions.keySet());
+ for (String folder : parentFolders) {
+ if (targetFs.exists(new Path(folder))) {
+ ancestorOwnerAndPermissions.remove(folder);
+ }
+ }
Properties props = new Properties();
props.setProperty(SetPermissionCommitStep.STOP_ON_ERROR_KEY, "true");
CommitStep setPermissionCommitStep = new
SetPermissionCommitStep(targetFs, ancestorOwnerAndPermissions, props);
diff --git
a/gobblin-data-management/src/main/java/org/apache/gobblin/util/commit/SetPermissionCommitStep.java
b/gobblin-data-management/src/main/java/org/apache/gobblin/util/commit/SetPermissionCommitStep.java
index dd1a68bb4..0887c05bf 100644
---
a/gobblin-data-management/src/main/java/org/apache/gobblin/util/commit/SetPermissionCommitStep.java
+++
b/gobblin-data-management/src/main/java/org/apache/gobblin/util/commit/SetPermissionCommitStep.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.security.AccessControlException;
+import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.apache.gobblin.commit.CommitStep;
@@ -38,6 +39,7 @@ import
org.apache.gobblin.data.management.copy.OwnerAndPermission;
*/
@Slf4j
public class SetPermissionCommitStep implements CommitStep {
+ @Getter
Map<String, OwnerAndPermission> pathAndPermissions;
private final URI fsUri;
public final boolean stopOnError;
diff --git
a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
index a0dbcacad..1adbe9ea3 100644
---
a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
+++
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/dataset/ManifestBasedDatasetFinderTest.java
@@ -93,6 +93,8 @@ public class ManifestBasedDatasetFinderTest {
FileSet<CopyEntity> fileSet = fileSets.next();
Assert.assertEquals(fileSet.getFiles().size(), 3); // 2 files to copy +
1 post publish step
Assert.assertTrue(((PostPublishStep)
fileSet.getFiles().get(2)).getStep() instanceof SetPermissionCommitStep);
+ SetPermissionCommitStep step = (SetPermissionCommitStep)
((PostPublishStep) fileSet.getFiles().get(2)).getStep();
+ Assert.assertEquals(step.getPathAndPermissions().keySet().size(), 2);
Mockito.verify(manifestReadFs, Mockito.times(1)).exists(manifestPath);
Mockito.verify(manifestReadFs,
Mockito.times(1)).getFileStatus(manifestPath);
Mockito.verify(manifestReadFs, Mockito.times(1)).open(manifestPath);
@@ -178,6 +180,7 @@ public class ManifestBasedDatasetFinderTest {
Mockito.when(sourceFs.getUri()).thenReturn(SRC_FS_URI);
Mockito.when(manifestReadFs.getUri()).thenReturn(MANIFEST_READ_FS_URI);
Mockito.when(destFs.getUri()).thenReturn(DEST_FS_URI);
+ Mockito.when(destFs.exists(new Path("/tmp"))).thenReturn(true);
Mockito.when(sourceFs.getFileStatus(any(Path.class))).thenReturn(localFs.getFileStatus(new
Path(tmpDir.toString())));
Mockito.when(sourceFs.exists(any(Path.class))).thenReturn(true);
Mockito.when(manifestReadFs.exists(any(Path.class))).thenReturn(true);