sanpwc commented on code in PR #4043:
URL: https://github.com/apache/ignite-3/pull/4043#discussion_r1665808093
##########
modules/partition-replicator/src/main/java/org/apache/ignite/internal/partition/replicator/PartitionReplicaLifecycleManager.java:
##########
@@ -203,22 +225,115 @@ public CompletableFuture<Void>
startAsync(ComponentContext componentContext) {
inBusyLock(busyLock, () ->
onCreateZone(parameters).thenApply((ignored) -> false))
);
- return nullCompletedFuture();
+ return allOf(processZonesOnStart, processAssignmentsFuture);
+ }
+
+ private CompletableFuture<Void> processZonesOnStart(long recoveryRevision,
@Nullable HybridTimestamp lwm) {
+ int earliestCatalogVersion =
catalogMgr.activeCatalogVersion(hybridTimestampToLong(lwm));
+ int latestCatalogVersion = catalogMgr.latestCatalogVersion();
+
+ var startedZones = new IntOpenHashSet();
+ var startZoneFutures = new ArrayList<CompletableFuture<?>>();
+
+ for (int ver = latestCatalogVersion; ver >= earliestCatalogVersion;
ver--) {
+ int ver0 = ver;
+ catalogMgr.zones(ver).stream()
+ .filter(tbl -> startedZones.add(tbl.id()))
+ .forEach(zoneDescriptor -> startZoneFutures.add(
+
calculateZoneAssignmentsAndCreateReplicationNodes(recoveryRevision, ver0,
zoneDescriptor)));
+ }
+
+ return allOf(startZoneFutures.toArray(CompletableFuture[]::new))
+ .whenComplete((unused, throwable) -> {
+ if (throwable != null) {
+ LOG.error("Error starting zones", throwable);
+ } else {
+ LOG.debug("Zones started successfully");
+ }
+ });
+ }
+
+ private CompletableFuture<Void> processAssignmentsOnRecovery(long
recoveryRevision) {
+ var stableAssignmentsPrefix = new ByteArray(STABLE_ASSIGNMENTS_PREFIX);
+ var pendingAssignmentsPrefix = new
ByteArray(PENDING_ASSIGNMENTS_PREFIX);
+
+ CompletableFuture<Void> stableFuture = handleAssignmentsOnRecovery(
+ stableAssignmentsPrefix,
+ recoveryRevision,
+ (entry, rev) -> handleChangeStableAssignmentEvent(entry, rev,
true),
+ "stable"
+ );
+
+ CompletableFuture<Void> pendingFuture = handleAssignmentsOnRecovery(
+ pendingAssignmentsPrefix,
+ recoveryRevision,
+ (entry, rev) -> handleChangePendingAssignmentEvent(entry, rev,
true),
+ "pending"
+ );
+
+ return allOf(stableFuture, pendingFuture);
+ }
+
+ private CompletableFuture<Void> handleAssignmentsOnRecovery(
+ ByteArray prefix,
+ long revision,
+ BiFunction<Entry, Long, CompletableFuture<Void>>
assignmentsEventHandler,
+ String assignmentsType
+ ) {
+ try (Cursor<Entry> cursor = metaStorageMgr.prefixLocally(prefix,
revision)) {
+ CompletableFuture<?>[] futures = cursor.stream()
+ .map(entry -> {
+ if (LOG.isInfoEnabled()) {
+ LOG.info(
+ "Missed {} assignments for key '{}'
discovered, performing recovery",
+ assignmentsType,
+ new String(entry.key(), UTF_8)
+ );
+ }
+
+ return assignmentsEventHandler.apply(entry, revision);
+ })
+ .toArray(CompletableFuture[]::new);
+
+ return allOf(futures)
+ // Simply log any errors, we don't want to block watch
processing.
+ .exceptionally(e -> {
+ LOG.error("Error when performing assignments
recovery", e);
+
+ return null;
Review Comment:
Why? Do we really want almost silent recovery on exceptions?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]