zhangyue19921010 commented on a change in pull request #3765:
URL: https://github.com/apache/hudi/pull/3765#discussion_r727813979
##########
File path:
hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieClusteringJob.java
##########
@@ -215,12 +221,26 @@ private int doCluster(JavaSparkContext jsc) throws
Exception {
return client.scheduleClustering(Option.empty());
}
- public int doScheduleAndCluster(JavaSparkContext jsc) throws Exception {
+ private int doScheduleAndCluster(JavaSparkContext jsc) throws Exception {
LOG.info("Step 1: Do schedule");
String schemaStr = getSchemaFromLatestInstant();
try (SparkRDDWriteClient<HoodieRecordPayload> client =
UtilHelpers.createHoodieClient(jsc, cfg.basePath, schemaStr, cfg.parallelism,
Option.empty(), props)) {
+ Option<String> instantTime;
+
+ if (cfg.retryLastFailedClusteringJob) {
+ HoodieSparkTable<HoodieRecordPayload> table =
HoodieSparkTable.create(client.getConfig(), client.getEngineContext());
+ HoodieTimeline inflightHoodieTimeline =
table.getActiveTimeline().filterPendingReplaceTimeline().filterInflights();
+ if (inflightHoodieTimeline.empty()) {
+ instantTime = doSchedule(client);
+ } else {
+ // if there has failed clustering, then we will use the failed
clustering instant-time to trigger next clustering action which will rollback
and clustering.
+ LOG.info("Find failed clustering plan : " +
inflightHoodieTimeline.lastInstant().get() + "; Will rollback and re-trigger
this failed clustering plan.");
Review comment:
Thanks for your review! Changed.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]