abstractdog commented on code in PR #3836:
URL: https://github.com/apache/hive/pull/3836#discussion_r1066913288


##########
ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java:
##########
@@ -162,6 +166,59 @@ public void testNonAcidDynamicPartitioning() throws 
Exception {
     confirmOutput(DataFormat.WITH_PARTITION_VALUE);
   }
 
+  @Test
+  public void testNonAcidRemoveDuplicate() throws Exception {
+    setBasePath("writeDuplicate");
+    setupData(DataFormat.WITH_PARTITION_VALUE);
+
+    FileSinkDesc desc = (FileSinkDesc) 
getFileSink(AcidUtils.Operation.NOT_ACID, true, 0).getConf().clone();
+    desc.setLinkedFileSink(true);
+    desc.setDirName(new Path(desc.getDirName(), 
AbstractFileMergeOperator.UNION_SUDBIR_PREFIX + "0"));
+    JobConf jobConf = new JobConf(jc);
+    jobConf.set("hive.execution.engine", "tez");
+    jobConf.set("mapred.task.id", "000000_0");
+    FileSinkOperator op1 = (FileSinkOperator)OperatorFactory.get(new 
CompilationOpContext(), FileSinkDesc.class);
+    op1.setConf(desc);
+    op1.initialize(jobConf, new ObjectInspector[]{inspector});
+
+    JobConf jobConf2 = new JobConf(jobConf);
+    jobConf2.set("mapred.task.id", "000000_1");
+    FileSinkOperator speculative = (FileSinkOperator)OperatorFactory.get(
+        new CompilationOpContext(), FileSinkDesc.class);
+    speculative.setConf(desc);
+    speculative.initialize(jobConf2, new ObjectInspector[]{inspector});
+
+    for (Object r : rows) {
+      op1.process(r, 0);
+      speculative.process(r, 0);
+    }
+
+    op1.close(false);
+    // speculative task also ends successfully
+    speculative.close(false);
+    Path[] paths = findFilesInBasePath();
+    List<Path> mondays = Arrays.stream(paths)
+        .filter(path -> 
path.getParent().toString().endsWith("partval=Monday/HIVE_UNION_SUBDIR_0"))
+        .collect(Collectors.toList());
+    Assert.assertTrue(mondays.size() == 2);
+    Set<String> fileNames = new HashSet<>();
+    fileNames.add(mondays.get(0).getName());
+    fileNames.add(mondays.get(1).getName());
+    Assert.assertTrue(fileNames.contains("000000_1") && 
fileNames.contains("000000_0"));

Review Comment:
   please elaborate on this assert for useful assertion error messages like, 
like:
   ```
       Assert.assertTrue("000000_1 file is expected", 
fileNames.contains("000000_1"));
       Assert.assertTrue("000000_0 file is expected", 
fileNames.contains("000000_0"));
   
   ```



##########
ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java:
##########
@@ -162,6 +166,59 @@ public void testNonAcidDynamicPartitioning() throws 
Exception {
     confirmOutput(DataFormat.WITH_PARTITION_VALUE);
   }
 
+  @Test
+  public void testNonAcidRemoveDuplicate() throws Exception {
+    setBasePath("writeDuplicate");
+    setupData(DataFormat.WITH_PARTITION_VALUE);
+
+    FileSinkDesc desc = (FileSinkDesc) 
getFileSink(AcidUtils.Operation.NOT_ACID, true, 0).getConf().clone();
+    desc.setLinkedFileSink(true);
+    desc.setDirName(new Path(desc.getDirName(), 
AbstractFileMergeOperator.UNION_SUDBIR_PREFIX + "0"));
+    JobConf jobConf = new JobConf(jc);
+    jobConf.set("hive.execution.engine", "tez");
+    jobConf.set("mapred.task.id", "000000_0");
+    FileSinkOperator op1 = (FileSinkOperator)OperatorFactory.get(new 
CompilationOpContext(), FileSinkDesc.class);
+    op1.setConf(desc);
+    op1.initialize(jobConf, new ObjectInspector[]{inspector});
+
+    JobConf jobConf2 = new JobConf(jobConf);
+    jobConf2.set("mapred.task.id", "000000_1");
+    FileSinkOperator speculative = (FileSinkOperator)OperatorFactory.get(
+        new CompilationOpContext(), FileSinkDesc.class);
+    speculative.setConf(desc);
+    speculative.initialize(jobConf2, new ObjectInspector[]{inspector});
+
+    for (Object r : rows) {
+      op1.process(r, 0);
+      speculative.process(r, 0);
+    }
+
+    op1.close(false);
+    // speculative task also ends successfully
+    speculative.close(false);
+    Path[] paths = findFilesInBasePath();
+    List<Path> mondays = Arrays.stream(paths)
+        .filter(path -> 
path.getParent().toString().endsWith("partval=Monday/HIVE_UNION_SUBDIR_0"))
+        .collect(Collectors.toList());
+    Assert.assertTrue(mondays.size() == 2);
+    Set<String> fileNames = new HashSet<>();
+    fileNames.add(mondays.get(0).getName());
+    fileNames.add(mondays.get(1).getName());
+    Assert.assertTrue(fileNames.contains("000000_1") && 
fileNames.contains("000000_0"));

Review Comment:
   please elaborate on this assert for useful assertion error messages like:
   ```
       Assert.assertTrue("000000_1 file is expected", 
fileNames.contains("000000_1"));
       Assert.assertTrue("000000_0 file is expected", 
fileNames.contains("000000_0"));
   
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org
For additional commands, e-mail: gitbox-h...@hive.apache.org

Reply via email to