danny0405 commented on code in PR #8673:
URL: https://github.com/apache/hudi/pull/8673#discussion_r1360495831
##########
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperatorCoordinator.java:
##########
@@ -422,26 +424,37 @@ private void initInstant(String instant) {
LOG.info("Recommit instant {}", instant);
commitInstant(instant);
}
- // starts a new instant
- startInstant();
+ String pendingInstant = ckpMetadata.lastPendingInstant();
+ if (pendingInstant == null) {
+ // starts a new instant
+ startInstant();
+ } else { // reuse pending instant if exists, depend on [HUDI-5223]
+ LOG.info("Reuse pending instant " + pendingInstant);
+ }
// upgrade downgrade
this.writeClient.upgradeDowngrade(this.instant, this.metaClient);
}, "initialize instant %s", instant);
}
- private void handleBootstrapEvent(WriteMetadataEvent event) {
+ private void handleBootstrapEvent(WriteResultEvent writeEvent) {
+ WriteMetadataEvent event = writeEvent.getWriteMetadataEvent();
this.eventBuffer[event.getTaskID()] = event;
if (Arrays.stream(eventBuffer).allMatch(evt -> evt != null &&
evt.isBootstrap())) {
- // start to initialize the instant.
- final String instant = Arrays.stream(eventBuffer)
- .filter(evt -> evt.getWriteStatuses().size() > 0)
- .findFirst().map(WriteMetadataEvent::getInstantTime)
- .orElse(WriteMetadataEvent.BOOTSTRAP_INSTANT);
- initInstant(instant);
+ if (Arrays.stream(eventBuffer).allMatch(e -> !Objects.equals(instant,
WriteMetadataEvent.BOOTSTRAP_INSTANT) &&
+ (e.getCurInstant() == null ||
HoodieTimeline.compareTimestamps(e.getCurInstant(), HoodieTimeline.LESSER_THAN,
instant)))) {
+ LOG.info("Current instant " + instant + " is new, reuse current
instant.");
+ } else {
Review Comment:
This check is risky because the in-correct instant may left some corrupt
files that can not be cleaned by the finalization of the pre-commit.
```java
HoodieTimeline.compareTimestamps(e.getCurInstant(),
HoodieTimeline.LESSER_THAN, instant)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]