satishkotha commented on a change in pull request #2048:
URL: https://github.com/apache/hudi/pull/2048#discussion_r488346156



##########
File path: 
hudi-client/src/test/java/org/apache/hudi/client/TestHoodieClientOnCopyOnWriteStorage.java
##########
@@ -880,6 +880,89 @@ public void testDeletesWithDeleteApi() throws Exception {
     testDeletes(client, updateBatch3.getRight(), 10, file1, "007", 140, 
keysSoFar);
   }
 
+  /**
+   * Test scenario of writing more file groups than existing number of file 
groups in partition.
+   */
+  @Test
+  public void testInsertOverwritePartitionHandlingWithMoreRecords() throws 
Exception {
+    verifyInsertOverwritePartitionHandling(1000, 3000);
+  }
+
+  /**
+   * Test scenario of writing fewer file groups than existing number of file 
groups in partition.
+   */
+  @Test
+  public void testInsertOverwritePartitionHandlingWithFewerRecords() throws 
Exception {
+    verifyInsertOverwritePartitionHandling(3000, 1000);
+  }
+
+  /**
+   * Test scenario of writing similar number file groups in partition.
+   */
+  @Test
+  public void testInsertOverwritePartitionHandlinWithSimilarNumberOfRecords() 
throws Exception {
+    verifyInsertOverwritePartitionHandling(3000, 3000);
+  }
+
+  /**
+   *  1) Do write1 (upsert) with 'batch1RecordsCount' number of records.
+   *  2) Do write2 (insert overwrite) with 'batch2RecordsCount' number of 
records.
+   *
+   *  Verify that all records in step1 are overwritten
+   */
+  private void verifyInsertOverwritePartitionHandling(int batch1RecordsCount, 
int batch2RecordsCount) throws Exception {
+    final String testPartitionPath = "americas";
+    HoodieWriteConfig config = getSmallInsertWriteConfig(2000);
+    HoodieWriteClient client = getHoodieWriteClient(config, false);
+    dataGen = new HoodieTestDataGenerator(new String[] {testPartitionPath});
+
+    // Do Inserts
+    String commitTime1 = "001";
+    client.startCommitWithTime(commitTime1);
+    List<HoodieRecord> inserts1 = dataGen.generateInserts(commitTime1, 
batch1RecordsCount);
+    JavaRDD<HoodieRecord> insertRecordsRDD1 = jsc.parallelize(inserts1, 2);
+    List<WriteStatus> statuses = client.upsert(insertRecordsRDD1, 
commitTime1).collect();
+    assertNoWriteErrors(statuses);
+    Set<String> batch1Buckets = statuses.stream().map(s -> 
s.getFileId()).collect(Collectors.toSet());
+    verifyParquetFileData(commitTime1, inserts1, statuses);
+
+    // Do Insert Overwrite
+    String commitTime2 = "002";
+    client.startCommitWithTime(commitTime2, 
HoodieTimeline.REPLACE_COMMIT_ACTION);
+    List<HoodieRecord> inserts2 = dataGen.generateInserts(commitTime2, 
batch2RecordsCount);
+    List<HoodieRecord> insertsAndUpdates2 = new ArrayList<>();
+    insertsAndUpdates2.addAll(inserts2);
+    JavaRDD<HoodieRecord> insertAndUpdatesRDD2 = 
jsc.parallelize(insertsAndUpdates2, 2);
+    statuses = client.insertOverwrite(insertAndUpdatesRDD2, 
commitTime2).collect();
+    assertNoWriteErrors(statuses);
+    Set<String> replacedBuckets = statuses.stream().filter(s -> 
s.isReplacedFileId())
+        .map(s -> s.getFileId()).collect(Collectors.toSet());
+    assertEquals(batch1Buckets, replacedBuckets);
+    List<WriteStatus> newBuckets = statuses.stream().filter(s -> 
!(s.isReplacedFileId()))
+        .collect(Collectors.toList());
+    verifyParquetFileData(commitTime2, inserts2, newBuckets);
+  }
+
+  /**
+   * Verify data in parquet files matches expected records and commit time.
+   */
+  private void verifyParquetFileData(String commitTime, List<HoodieRecord> 
expectedRecords, List<WriteStatus> allStatus) {

Review comment:
       renamed it for now. I'll look into if there are any other helpers for 
doing this. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to