hadoop-yetus commented on a change in pull request #919: HADOOP-16158. DistCp 
to support checksum validation when copy blocks in parallel
URL: https://github.com/apache/hadoop/pull/919#discussion_r291438906
 
 

 ##########
 File path: 
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java
 ##########
 @@ -338,6 +354,142 @@ public void testAtomicCommitExistingFinal() throws 
IOException {
     }
   }
 
+  @Test
+  public void testCommitWithChecksumMismatchAndSkipCrc() throws IOException {
+    testCommitWithChecksumMismatch(true);
+  }
+
+  @Test
+  public void testCommitWithChecksumMismatchWithoutSkipCrc()
+      throws IOException {
+    testCommitWithChecksumMismatch(false);
+  }
+
+  private void testCommitWithChecksumMismatch(boolean skipCrc)
+      throws IOException {
+
+    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
+    JobContext jobContext = new JobContextImpl(
+        taskAttemptContext.getConfiguration(),
+        taskAttemptContext.getTaskAttemptID().getJobID());
+    Configuration conf = jobContext.getConfiguration();
+
+    String sourceBase;
+    String targetBase;
+    FileSystem fs = null;
+    try {
+      fs = FileSystem.get(conf);
+      sourceBase = "/tmp1/" + String.valueOf(rand.nextLong());
+      targetBase = "/tmp1/" + String.valueOf(rand.nextLong());
+
+      int blocksPerChunk = 5;
+      String srcFilename = "/srcdata";
+      createSrcAndWorkFilesWithDifferentChecksum(fs, targetBase, sourceBase,
+          srcFilename, blocksPerChunk);
+
+      DistCpOptions options = new DistCpOptions.Builder(
+          Collections.singletonList(new Path(sourceBase)),
+          new Path("/out"))
+          .withBlocksPerChunk(blocksPerChunk)
+          .withCRC(skipCrc)
+          .build();
+      options.appendToConf(conf);
+      conf.setBoolean(
+          DistCpConstants.CONF_LABEL_SIMPLE_LISTING_RANDOMIZE_FILES, false);
+      DistCpContext context = new DistCpContext(options);
+      context.setTargetPathExists(false);
+
+      CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
+      Path listingFile = new Path("/tmp1/"
+          + String.valueOf(rand.nextLong()));
+      listing.buildListing(listingFile, context);
+
+      conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
+      conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);
+
+      OutputCommitter committer = new CopyCommitter(
+          null, taskAttemptContext);
+      try {
+        committer.commitJob(jobContext);
+        
 
 Review comment:
   whitespace:end of line
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to