sadanand48 commented on code in PR #5885:
URL: https://github.com/apache/hadoop/pull/5885#discussion_r1274102593
##########
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java:
##########
@@ -305,6 +305,33 @@ public static CopyListing getCopyListing(Configuration
configuration,
}
}
+ /**
+ * Public Factory method with which the appropriate Diff CopyListing
implementation may be retrieved.
+ * @param configuration The input configuration.
+ * @param credentials Credentials object on which the FS delegation tokens
are cached
+ * @param distCpSync DistcpSync object used to sync diffs between source and
target.
+ * @return An instance of the appropriate CopyListing implementation.
+ * @throws java.io.IOException - Exception if any
+ */
+ public static CopyListing getDiffCopyListing(Configuration configuration,
+ Credentials credentials, DistCpSync distCpSync) throws IOException {
+ String copyListingClassName =
+ configuration.get(DistCpConstants.CONF_LABEL_DIFF_COPY_LISTING_CLASS,
+ "");
+ try {
+ Class<? extends CopyListing> copyListingClass =
+
configuration.getClass(DistCpConstants.CONF_LABEL_DIFF_COPY_LISTING_CLASS,
+ SimpleCopyListing.class, SimpleCopyListing.class);
+ copyListingClassName = copyListingClass.getName();
+ Constructor<? extends CopyListing> constructor =
+ copyListingClass.getDeclaredConstructor(Configuration.class,
+ Credentials.class, DistCpSync.class);
+ return constructor.newInstance(configuration, credentials,distCpSync);
Review Comment:
Done.
##########
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListing.java:
##########
@@ -167,6 +169,66 @@ public void testDuplicates() {
}
}
+ @Test(timeout=10000)
+ public void testFlatDiffCopyListing() {
+ FileSystem fs = null;
+ try {
+ fs = FileSystem.get(getConf());
+ List<Path> srcPaths = new ArrayList<Path>();
+ srcPaths.add(new Path("/tmp/in"));
+ TestDistCpUtils.createFile(fs, "/tmp/in/src1/1.txt");
+ TestDistCpUtils.createFile(fs, "/tmp/in/src2/1.txt");
+ TestDistCpUtils.createFile(fs, "/tmp/in/src3/3.txt");
+ TestDistCpUtils.createFile(fs, "/tmp/in/src3/4.txt");
+ Path target = new Path("/tmp/out");
+ Path listingFile = new Path("/tmp/list");
+ // adding below flags useDiff & sync only to enable
context.shouldUseSnapshotDiff()
+ final DistCpOptions options = new DistCpOptions.Builder(srcPaths, target)
+ .withUseDiff("snap1","snap2")
+ .withSyncFolder(true)
+ .build();
+ final DistCpContext context = new DistCpContext(options);
+ Configuration configuration = getConf();
+ configuration.set(DistCpConstants.CONF_LABEL_DIFF_COPY_LISTING_CLASS,
+ FlatDiffCopyListing.class.getName());
+ DistCpSync distCpSync = Mockito.mock(DistCpSync.class);
+ // Create a dummy DiffInfo List that contains a directory + paths inside
+ // that directory as part of the diff.
+
+ ArrayList<DiffInfo> diffs = new ArrayList<>();
+ diffs.add(
+ new DiffInfo(new Path("/tmp/in/src3/"), new Path("/tmp/in/src3/"),
+ SnapshotDiffReport.DiffType.CREATE));
+ diffs.add(new DiffInfo(new Path("/tmp/in/src3/3.txt"),
+ new Path("/tmp/in/src3/3.txt"), SnapshotDiffReport.DiffType.CREATE));
+ diffs.add(new DiffInfo(new Path("/tmp/in/src3/4.txt"),
+ new Path("/tmp/in/src3/4.txt"), SnapshotDiffReport.DiffType.CREATE));
+
Mockito.when(distCpSync.prepareDiffListForCopyListing()).thenReturn(diffs);
+
+ CopyListing listing =
+ CopyListing.getDiffCopyListing(configuration,
CREDENTIALS,distCpSync);
+ // won't throw DuplicateFileException as copyListing is
FlatDiffCopyListing.
+ listing.buildListing(listingFile, context);
+
+ // Throws DuplicateFileException when copyListing is SimpleCopyListing
+ // as it recursively traverses src3 directory and also adds 3.txt,4.txt
twice
+
configuration.set(DistCpConstants.CONF_LABEL_DIFF_COPY_LISTING_CLASS,SimpleCopyListing.class.getName());
+ try{
+ listing =
+ CopyListing.getDiffCopyListing(configuration,
CREDENTIALS,distCpSync);
Review Comment:
Done.
##########
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListing.java:
##########
@@ -167,6 +169,66 @@ public void testDuplicates() {
}
}
+ @Test(timeout=10000)
+ public void testFlatDiffCopyListing() {
+ FileSystem fs = null;
+ try {
+ fs = FileSystem.get(getConf());
+ List<Path> srcPaths = new ArrayList<Path>();
+ srcPaths.add(new Path("/tmp/in"));
+ TestDistCpUtils.createFile(fs, "/tmp/in/src1/1.txt");
+ TestDistCpUtils.createFile(fs, "/tmp/in/src2/1.txt");
+ TestDistCpUtils.createFile(fs, "/tmp/in/src3/3.txt");
+ TestDistCpUtils.createFile(fs, "/tmp/in/src3/4.txt");
+ Path target = new Path("/tmp/out");
+ Path listingFile = new Path("/tmp/list");
+ // adding below flags useDiff & sync only to enable
context.shouldUseSnapshotDiff()
+ final DistCpOptions options = new DistCpOptions.Builder(srcPaths, target)
+ .withUseDiff("snap1","snap2")
+ .withSyncFolder(true)
+ .build();
+ final DistCpContext context = new DistCpContext(options);
+ Configuration configuration = getConf();
+ configuration.set(DistCpConstants.CONF_LABEL_DIFF_COPY_LISTING_CLASS,
+ FlatDiffCopyListing.class.getName());
+ DistCpSync distCpSync = Mockito.mock(DistCpSync.class);
+ // Create a dummy DiffInfo List that contains a directory + paths inside
+ // that directory as part of the diff.
+
+ ArrayList<DiffInfo> diffs = new ArrayList<>();
+ diffs.add(
+ new DiffInfo(new Path("/tmp/in/src3/"), new Path("/tmp/in/src3/"),
+ SnapshotDiffReport.DiffType.CREATE));
+ diffs.add(new DiffInfo(new Path("/tmp/in/src3/3.txt"),
+ new Path("/tmp/in/src3/3.txt"), SnapshotDiffReport.DiffType.CREATE));
+ diffs.add(new DiffInfo(new Path("/tmp/in/src3/4.txt"),
+ new Path("/tmp/in/src3/4.txt"), SnapshotDiffReport.DiffType.CREATE));
+
Mockito.when(distCpSync.prepareDiffListForCopyListing()).thenReturn(diffs);
+
+ CopyListing listing =
+ CopyListing.getDiffCopyListing(configuration,
CREDENTIALS,distCpSync);
+ // won't throw DuplicateFileException as copyListing is
FlatDiffCopyListing.
+ listing.buildListing(listingFile, context);
+
+ // Throws DuplicateFileException when copyListing is SimpleCopyListing
+ // as it recursively traverses src3 directory and also adds 3.txt,4.txt
twice
+
configuration.set(DistCpConstants.CONF_LABEL_DIFF_COPY_LISTING_CLASS,SimpleCopyListing.class.getName());
+ try{
+ listing =
+ CopyListing.getDiffCopyListing(configuration,
CREDENTIALS,distCpSync);
+ listing.buildListing(listingFile, context);
Review Comment:
Done.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]