denis-chudov commented on code in PR #7379:
URL: https://github.com/apache/ignite-3/pull/7379#discussion_r2676113823


##########
modules/replicator/src/main/java/org/apache/ignite/internal/replicator/PlacementDriverMessageProcessor.java:
##########
@@ -259,18 +259,51 @@ private CompletableFuture<LeaseGrantedMessageResponse> 
proposeLeaseRedirect(Inte
      * @return Future that is completed when local storage catches up the 
index that is actual for leader on the moment of request.
      */
     private CompletableFuture<Void> waitForActualState(HybridTimestamp 
startTime, long expirationTime) {
-        LOG.info("Waiting for actual storage state, group=" + groupId);
+        long timeout = expirationTime - currentTimeMillis();
+
+        LOG.info("Waiting for actual storage state [groupId={}, 
leaseStartTime={},"
+                        + " expirationTime={}, timeoutMs={}]",
+                groupId, startTime, expirationTime, timeout);
 
         replicaReservationClosure.accept(groupId, startTime);
 
-        long timeout = expirationTime - currentTimeMillis();
         if (timeout <= 0) {
-            return failedFuture(new TimeoutException());
+            LOG.warn("Timeout already expired before starting wait 
[groupId={}, expirationTime={}, currentTime={}]",
+                    groupId, expirationTime, currentTimeMillis());
+            return failedFuture(new TimeoutException("Timeout expired before 
starting wait"));

Review Comment:
   ```suggestion
               return failedFuture(new TimeoutException("Timeout expired before 
waiting started"));
   ```



##########
modules/replicator/src/main/java/org/apache/ignite/internal/replicator/PlacementDriverMessageProcessor.java:
##########
@@ -259,18 +259,51 @@ private CompletableFuture<LeaseGrantedMessageResponse> 
proposeLeaseRedirect(Inte
      * @return Future that is completed when local storage catches up the 
index that is actual for leader on the moment of request.
      */
     private CompletableFuture<Void> waitForActualState(HybridTimestamp 
startTime, long expirationTime) {
-        LOG.info("Waiting for actual storage state, group=" + groupId);
+        long timeout = expirationTime - currentTimeMillis();
+
+        LOG.info("Waiting for actual storage state [groupId={}, 
leaseStartTime={},"
+                        + " expirationTime={}, timeoutMs={}]",
+                groupId, startTime, expirationTime, timeout);
 
         replicaReservationClosure.accept(groupId, startTime);
 
-        long timeout = expirationTime - currentTimeMillis();
         if (timeout <= 0) {
-            return failedFuture(new TimeoutException());
+            LOG.warn("Timeout already expired before starting wait 
[groupId={}, expirationTime={}, currentTime={}]",

Review Comment:
   ```suggestion
               LOG.warn("Timeout already expired before waiting started 
[groupId={}, expirationTime={}, currentTime={}]",
   ```



##########
modules/replicator/src/main/java/org/apache/ignite/internal/replicator/PlacementDriverMessageProcessor.java:
##########
@@ -259,18 +259,51 @@ private CompletableFuture<LeaseGrantedMessageResponse> 
proposeLeaseRedirect(Inte
      * @return Future that is completed when local storage catches up the 
index that is actual for leader on the moment of request.
      */
     private CompletableFuture<Void> waitForActualState(HybridTimestamp 
startTime, long expirationTime) {
-        LOG.info("Waiting for actual storage state, group=" + groupId);
+        long timeout = expirationTime - currentTimeMillis();
+
+        LOG.info("Waiting for actual storage state [groupId={}, 
leaseStartTime={},"
+                        + " expirationTime={}, timeoutMs={}]",
+                groupId, startTime, expirationTime, timeout);
 
         replicaReservationClosure.accept(groupId, startTime);
 
-        long timeout = expirationTime - currentTimeMillis();
         if (timeout <= 0) {
-            return failedFuture(new TimeoutException());
+            LOG.warn("Timeout already expired before starting wait 
[groupId={}, expirationTime={}, currentTime={}]",
+                    groupId, expirationTime, currentTimeMillis());
+            return failedFuture(new TimeoutException("Timeout expired before 
starting wait"));
         }
 
+        long readIndexStartTime = currentTimeMillis();
+
         return retryOperationUntilSuccess(raftClient::readIndex, e -> 
currentTimeMillis() > expirationTime, executor)
-                .orTimeout(timeout, TimeUnit.MILLISECONDS)
-                .thenCompose(storageIndexTracker::waitFor);
+                .whenComplete((raftIndex, readIndexError) -> {
+                    long readIndexDuration = currentTimeMillis() - 
readIndexStartTime;
+
+                    if (readIndexError != null) {
+                        LOG.warn("Failed to read index from raft leader 
[groupId={}, durationMs={}, error={}]",
+                                groupId, readIndexDuration, readIndexError);
+                    }
+                })
+                .thenCompose(raftIndex -> {
+                    // Recalculate remaining time after readIndex completes.
+                    long storageIndexUpdateStartTime = currentTimeMillis();
+                    long remainingTime = expirationTime - 
storageIndexUpdateStartTime;
+                    if (remainingTime <= 0) {
+                        return failedFuture(new TimeoutException("No time left 
for storage wait"));
+                    }
+
+                    return storageIndexTracker.waitFor(raftIndex)
+                            .orTimeout(remainingTime, TimeUnit.MILLISECONDS)
+                            .whenComplete((v, storageIndexTrackerError) -> {
+                                long trackingDuration = currentTimeMillis() - 
storageIndexUpdateStartTime;
+
+                                if (storageIndexTrackerError != null) {
+                                    LOG.warn("Failed to wait for storage index 
to reach raft leader"

Review Comment:
   Same about stack trace.
   
   Let's also add some logs for errors in 
`sendPrimaryReplicaChangeToReplicationGroup`



##########
modules/replicator/src/main/java/org/apache/ignite/internal/replicator/PlacementDriverMessageProcessor.java:
##########
@@ -259,18 +259,51 @@ private CompletableFuture<LeaseGrantedMessageResponse> 
proposeLeaseRedirect(Inte
      * @return Future that is completed when local storage catches up the 
index that is actual for leader on the moment of request.
      */
     private CompletableFuture<Void> waitForActualState(HybridTimestamp 
startTime, long expirationTime) {
-        LOG.info("Waiting for actual storage state, group=" + groupId);
+        long timeout = expirationTime - currentTimeMillis();
+
+        LOG.info("Waiting for actual storage state [groupId={}, 
leaseStartTime={},"
+                        + " expirationTime={}, timeoutMs={}]",
+                groupId, startTime, expirationTime, timeout);
 
         replicaReservationClosure.accept(groupId, startTime);
 
-        long timeout = expirationTime - currentTimeMillis();
         if (timeout <= 0) {
-            return failedFuture(new TimeoutException());
+            LOG.warn("Timeout already expired before starting wait 
[groupId={}, expirationTime={}, currentTime={}]",
+                    groupId, expirationTime, currentTimeMillis());
+            return failedFuture(new TimeoutException("Timeout expired before 
starting wait"));
         }
 
+        long readIndexStartTime = currentTimeMillis();
+
         return retryOperationUntilSuccess(raftClient::readIndex, e -> 
currentTimeMillis() > expirationTime, executor)

Review Comment:
   let's also add to condition in `retryOperationUntilSuccess`: if the future 
is already completed (for example, exceptionally due to timeout) then also stop 
retrying



##########
modules/replicator/src/main/java/org/apache/ignite/internal/replicator/PlacementDriverMessageProcessor.java:
##########
@@ -259,18 +259,51 @@ private CompletableFuture<LeaseGrantedMessageResponse> 
proposeLeaseRedirect(Inte
      * @return Future that is completed when local storage catches up the 
index that is actual for leader on the moment of request.
      */
     private CompletableFuture<Void> waitForActualState(HybridTimestamp 
startTime, long expirationTime) {
-        LOG.info("Waiting for actual storage state, group=" + groupId);
+        long timeout = expirationTime - currentTimeMillis();
+
+        LOG.info("Waiting for actual storage state [groupId={}, 
leaseStartTime={},"
+                        + " expirationTime={}, timeoutMs={}]",
+                groupId, startTime, expirationTime, timeout);
 
         replicaReservationClosure.accept(groupId, startTime);
 
-        long timeout = expirationTime - currentTimeMillis();
         if (timeout <= 0) {
-            return failedFuture(new TimeoutException());
+            LOG.warn("Timeout already expired before starting wait 
[groupId={}, expirationTime={}, currentTime={}]",
+                    groupId, expirationTime, currentTimeMillis());
+            return failedFuture(new TimeoutException("Timeout expired before 
starting wait"));
         }
 
+        long readIndexStartTime = currentTimeMillis();
+
         return retryOperationUntilSuccess(raftClient::readIndex, e -> 
currentTimeMillis() > expirationTime, executor)
-                .orTimeout(timeout, TimeUnit.MILLISECONDS)
-                .thenCompose(storageIndexTracker::waitFor);
+                .whenComplete((raftIndex, readIndexError) -> {
+                    long readIndexDuration = currentTimeMillis() - 
readIndexStartTime;
+
+                    if (readIndexError != null) {
+                        LOG.warn("Failed to read index from raft leader 
[groupId={}, durationMs={}, error={}]",
+                                groupId, readIndexDuration, readIndexError);
+                    }
+                })
+                .thenCompose(raftIndex -> {

Review Comment:
   I think you can do both in single closure using `.handle()`



##########
modules/replicator/src/main/java/org/apache/ignite/internal/replicator/PlacementDriverMessageProcessor.java:
##########
@@ -259,18 +259,51 @@ private CompletableFuture<LeaseGrantedMessageResponse> 
proposeLeaseRedirect(Inte
      * @return Future that is completed when local storage catches up the 
index that is actual for leader on the moment of request.
      */
     private CompletableFuture<Void> waitForActualState(HybridTimestamp 
startTime, long expirationTime) {
-        LOG.info("Waiting for actual storage state, group=" + groupId);
+        long timeout = expirationTime - currentTimeMillis();
+
+        LOG.info("Waiting for actual storage state [groupId={}, 
leaseStartTime={},"
+                        + " expirationTime={}, timeoutMs={}]",
+                groupId, startTime, expirationTime, timeout);
 
         replicaReservationClosure.accept(groupId, startTime);
 
-        long timeout = expirationTime - currentTimeMillis();
         if (timeout <= 0) {
-            return failedFuture(new TimeoutException());
+            LOG.warn("Timeout already expired before starting wait 
[groupId={}, expirationTime={}, currentTime={}]",
+                    groupId, expirationTime, currentTimeMillis());
+            return failedFuture(new TimeoutException("Timeout expired before 
starting wait"));
         }
 
+        long readIndexStartTime = currentTimeMillis();
+
         return retryOperationUntilSuccess(raftClient::readIndex, e -> 
currentTimeMillis() > expirationTime, executor)
-                .orTimeout(timeout, TimeUnit.MILLISECONDS)
-                .thenCompose(storageIndexTracker::waitFor);
+                .whenComplete((raftIndex, readIndexError) -> {
+                    long readIndexDuration = currentTimeMillis() - 
readIndexStartTime;
+
+                    if (readIndexError != null) {
+                        LOG.warn("Failed to read index from raft leader 
[groupId={}, durationMs={}, error={}]",

Review Comment:
   pls add `readIndexError` as `Throwable`-type argument, so that `warn` could 
print the throwable stack trace



##########
modules/replicator/src/main/java/org/apache/ignite/internal/replicator/PlacementDriverMessageProcessor.java:
##########
@@ -259,18 +259,51 @@ private CompletableFuture<LeaseGrantedMessageResponse> 
proposeLeaseRedirect(Inte
      * @return Future that is completed when local storage catches up the 
index that is actual for leader on the moment of request.
      */
     private CompletableFuture<Void> waitForActualState(HybridTimestamp 
startTime, long expirationTime) {
-        LOG.info("Waiting for actual storage state, group=" + groupId);
+        long timeout = expirationTime - currentTimeMillis();
+
+        LOG.info("Waiting for actual storage state [groupId={}, 
leaseStartTime={},"
+                        + " expirationTime={}, timeoutMs={}]",
+                groupId, startTime, expirationTime, timeout);
 
         replicaReservationClosure.accept(groupId, startTime);
 
-        long timeout = expirationTime - currentTimeMillis();
         if (timeout <= 0) {
-            return failedFuture(new TimeoutException());
+            LOG.warn("Timeout already expired before starting wait 
[groupId={}, expirationTime={}, currentTime={}]",
+                    groupId, expirationTime, currentTimeMillis());
+            return failedFuture(new TimeoutException("Timeout expired before 
starting wait"));
         }
 
+        long readIndexStartTime = currentTimeMillis();
+
         return retryOperationUntilSuccess(raftClient::readIndex, e -> 
currentTimeMillis() > expirationTime, executor)
-                .orTimeout(timeout, TimeUnit.MILLISECONDS)
-                .thenCompose(storageIndexTracker::waitFor);
+                .whenComplete((raftIndex, readIndexError) -> {
+                    long readIndexDuration = currentTimeMillis() - 
readIndexStartTime;
+
+                    if (readIndexError != null) {
+                        LOG.warn("Failed to read index from raft leader 
[groupId={}, durationMs={}, error={}]",
+                                groupId, readIndexDuration, readIndexError);
+                    }
+                })
+                .thenCompose(raftIndex -> {
+                    // Recalculate remaining time after readIndex completes.
+                    long storageIndexUpdateStartTime = currentTimeMillis();
+                    long remainingTime = expirationTime - 
storageIndexUpdateStartTime;
+                    if (remainingTime <= 0) {
+                        return failedFuture(new TimeoutException("No time left 
for storage wait"));

Review Comment:
   What about wrapping both cases into some kind of `withTimeout()`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to