codope commented on code in PR #12711:
URL: https://github.com/apache/hudi/pull/12711#discussion_r1929791055


##########
hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java:
##########
@@ -1062,6 +1077,109 @@ public void testHoodieIndexer(HoodieRecordType 
recordType) throws Exception {
     UtilitiesTestBase.Helpers.deleteFileFromDfs(fs, tableBasePath);
   }
 
+  @ParameterizedTest
+  @EnumSource(value = HoodieRecordType.class, names = {"AVRO", "SPARK"})
+  public void testHoodieIndexerExecutionAfterCommit(HoodieRecordType 
recordType) throws Exception {
+    String tableBasePath = basePath + "/asyncindexer";
+    HoodieDeltaStreamer ds = initialHoodieDeltaStreamer(tableBasePath, 1000, 
"false", recordType, WriteOperationType.UPSERT,
+        Collections.singleton(HoodieIndexConfig.INDEX_TYPE.key() + 
"=GLOBAL_SIMPLE"));
+
+    deltaStreamerTestRunner(ds, (r) -> {
+      TestHelpers.assertAtLeastNCommits(2, tableBasePath);
+
+      Option<String> scheduleIndexInstantTime;
+      try {
+        HoodieIndexer scheduleIndexingJob = new HoodieIndexer(jsc,
+            buildIndexerConfig(tableBasePath, ds.getConfig().targetTableName, 
null, UtilHelpers.SCHEDULE, "RECORD_INDEX"));
+        scheduleIndexInstantTime = scheduleIndexingJob.doSchedule();
+      } catch (Exception e) {
+        LOG.info("Schedule indexing failed", e);
+        return false;
+      }
+      if (scheduleIndexInstantTime.isPresent()) {
+        TestHelpers.assertPendingIndexCommit(tableBasePath);
+        LOG.info("Schedule indexing success, now build index with instant time 
" + scheduleIndexInstantTime.get());
+        TestHelpers.waitFor(() -> {
+          HoodieTableMetaClient metaClient = 
HoodieTestUtils.createMetaClient(storage, tableBasePath);
+          HoodieTimeline pendingCommitsTimeline = 
metaClient.getActiveTimeline().getCommitsTimeline().filterInflightsAndRequested();
+          return !pendingCommitsTimeline.empty();
+        });
+        HoodieIndexer runIndexingJob = new HoodieIndexer(jsc,
+            buildIndexerConfig(tableBasePath, ds.getConfig().targetTableName, 
scheduleIndexInstantTime.get(), UtilHelpers.EXECUTE, "RECORD_INDEX",
+                
Collections.singletonList(HoodieMetadataConfig.RECORD_INDEX_ENABLE_PROP.key() + 
"=true")));
+        runIndexingJob.start(0);
+        LOG.info("Metadata indexing success");
+        TestHelpers.assertCompletedIndexCommit(tableBasePath);
+      } else {
+        LOG.warn("Metadata indexing failed");
+      }
+      return true;
+    });
+
+    validateRecordIndex(tableBasePath);
+    UtilitiesTestBase.Helpers.deleteFileFromDfs(fs, tableBasePath);
+  }
+
+  private static void validateRecordIndex(String tableBasePath) {
+    HoodieMetadataTableValidator.Config config = new 
HoodieMetadataTableValidator.Config();
+    config.basePath = tableBasePath;
+    config.validateLatestFileSlices = true;
+    config.validateAllFileGroups = true;
+    config.validateRecordIndexContent = true;
+    config.validateRecordIndexCount = true;
+    HoodieMetadataTableValidator validator = new 
HoodieMetadataTableValidator(jsc, config);
+    assertTrue(validator.run());
+    assertFalse(validator.hasValidationFailure());
+    assertTrue(validator.getThrowables().isEmpty());
+  }
+
+  @ParameterizedTest
+  @EnumSource(value = HoodieRecordType.class, names = {"AVRO", "SPARK"})
+  public void testHoodieIndexerExecutionAfterClustering(HoodieRecordType 
recordType) throws Exception {

Review Comment:
   can you please add a comment on the sequence of commits in the code itself? 
From reading the test, it looks like there is 1 commit, and then interleaved 
replacecommit and indexing commit. Please document both the tests as much as 
possible.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to