[ 
https://issues.apache.org/jira/browse/HADOOP-18582?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17651788#comment-17651788
 ] 

ASF GitHub Bot commented on HADOOP-18582:
-----------------------------------------

steveloughran commented on code in PR #5251:
URL: https://github.com/apache/hadoop/pull/5251#discussion_r1056821189


##########
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java:
##########
@@ -580,6 +580,72 @@ fs, new Path(sourceBase + srcFilename), null,
     }
   }
 
+  @Test
+  public void testCommitWithCleanupTempFiles() throws IOException {
+    testCommitWithCleanup(true, false);
+    testCommitWithCleanup(false, true);
+    testCommitWithCleanup(true, true);
+    testCommitWithCleanup(false, false);
+  }
+
+  private void testCommitWithCleanup(boolean append, boolean 
directWrite)throws IOException {
+    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
+    JobID jobID = taskAttemptContext.getTaskAttemptID().getJobID();
+    JobContext jobContext = new JobContextImpl(
+        taskAttemptContext.getConfiguration(),
+        jobID);
+    Configuration conf = jobContext.getConfiguration();
+
+    String sourceBase;
+    String targetBase;
+    FileSystem fs = null;
+    try {
+      fs = FileSystem.get(conf);
+      sourceBase = "/tmp1/" + rand.nextLong();
+      targetBase = "/tmp1/" + rand.nextLong();
+
+      DistCpOptions options = new DistCpOptions.Builder(
+          Collections.singletonList(new Path(sourceBase)),
+          new Path("/out"))
+          .withAppend(append)
+          .withSyncFolder(true)
+          .withDirectWrite(directWrite)
+          .build();
+      options.appendToConf(conf);
+
+      DistCpContext context = new DistCpContext(options);
+      context.setTargetPathExists(false);
+
+
+      conf.set(CONF_LABEL_TARGET_WORK_PATH, targetBase);
+      conf.set(CONF_LABEL_TARGET_FINAL_PATH, targetBase);
+
+      Path tempFilePath = getTempFile(targetBase, taskAttemptContext);
+      createDirectory(fs, tempFilePath);
+
+      OutputCommitter committer = new CopyCommitter(
+          null, taskAttemptContext);
+      committer.commitJob(jobContext);
+
+      if (append || directWrite) {
+        Assert.assertTrue(fs.exists(tempFilePath));

Review Comment:
   use assertions in `ContractTestUtils`, e.g `assertIsDirectory()`, 
`assertIsFile()` so we get detailed error messages on failure, rather than just 
"assert false"



##########
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java:
##########
@@ -580,6 +580,72 @@ fs, new Path(sourceBase + srcFilename), null,
     }
   }
 
+  @Test
+  public void testCommitWithCleanupTempFiles() throws IOException {
+    testCommitWithCleanup(true, false);
+    testCommitWithCleanup(false, true);
+    testCommitWithCleanup(true, true);
+    testCommitWithCleanup(false, false);
+  }
+
+  private void testCommitWithCleanup(boolean append, boolean 
directWrite)throws IOException {
+    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
+    JobID jobID = taskAttemptContext.getTaskAttemptID().getJobID();
+    JobContext jobContext = new JobContextImpl(
+        taskAttemptContext.getConfiguration(),
+        jobID);
+    Configuration conf = jobContext.getConfiguration();
+
+    String sourceBase;
+    String targetBase;
+    FileSystem fs = null;
+    try {
+      fs = FileSystem.get(conf);
+      sourceBase = "/tmp1/" + rand.nextLong();
+      targetBase = "/tmp1/" + rand.nextLong();
+
+      DistCpOptions options = new DistCpOptions.Builder(
+          Collections.singletonList(new Path(sourceBase)),
+          new Path("/out"))
+          .withAppend(append)
+          .withSyncFolder(true)
+          .withDirectWrite(directWrite)
+          .build();
+      options.appendToConf(conf);
+
+      DistCpContext context = new DistCpContext(options);
+      context.setTargetPathExists(false);
+
+
+      conf.set(CONF_LABEL_TARGET_WORK_PATH, targetBase);
+      conf.set(CONF_LABEL_TARGET_FINAL_PATH, targetBase);
+
+      Path tempFilePath = getTempFile(targetBase, taskAttemptContext);
+      createDirectory(fs, tempFilePath);
+
+      OutputCommitter committer = new CopyCommitter(
+          null, taskAttemptContext);
+      committer.commitJob(jobContext);
+
+      if (append || directWrite) {
+        Assert.assertTrue(fs.exists(tempFilePath));
+      }else {
+        Assert.assertFalse(fs.exists(tempFilePath));

Review Comment:
   use `assertPathDoesNotExist()`





> No need to clean tmp files in ditcp direct mode
> -----------------------------------------------
>
>                 Key: HADOOP-18582
>                 URL: https://issues.apache.org/jira/browse/HADOOP-18582
>             Project: Hadoop Common
>          Issue Type: Bug
>          Components: tools/distcp
>    Affects Versions: 3.3.4
>            Reporter: 10000kang
>            Assignee: 10000kang
>            Priority: Major
>              Labels: pull-request-available
>
> it not necessary to do `cleanupTempFiles`  while ditcp commit job in direct  
> mode, because it there is no temp files in direct mode.
> This clean operation will increase the task execution time, because it will 
> get the list of files in the target path. When the number of files in the 
> target path is very large, this operation will be very slow.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to