supratimdeka commented on a change in pull request #1226: HDDS-1610. 
applyTransaction failure should not be lost on restart.
URL: https://github.com/apache/hadoop/pull/1226#discussion_r311898006
 
 

 ##########
 File path: 
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
 ##########
 @@ -674,30 +674,54 @@ public void notifyIndexUpdate(long term, long index) {
       if (cmdType == Type.WriteChunk || cmdType ==Type.PutSmallFile) {
         builder.setCreateContainerSet(createContainerSet);
       }
+      CompletableFuture<Message> applyTransactionFuture =
+          new CompletableFuture<>();
       // Ensure the command gets executed in a separate thread than
       // stateMachineUpdater thread which is calling applyTransaction here.
-      CompletableFuture<Message> future = CompletableFuture
-          .supplyAsync(() -> runCommand(requestProto, builder.build()),
+      CompletableFuture<ContainerCommandResponseProto> future =
+          CompletableFuture.supplyAsync(
+              () -> runCommandGetResponse(requestProto, builder.build()),
               getCommandExecutor(requestProto));
-
-      future.thenAccept(m -> {
+      future.thenApply(r -> {
         if (trx.getServerRole() == RaftPeerRole.LEADER) {
           long startTime = (long) trx.getStateMachineContext();
           metrics.incPipelineLatency(cmdType,
               Time.monotonicNowNanos() - startTime);
         }
-
-        final Long previous =
-            applyTransactionCompletionMap
-                .put(index, trx.getLogEntry().getTerm());
-        Preconditions.checkState(previous == null);
-        if (cmdType == Type.WriteChunk || cmdType == Type.PutSmallFile) {
-          metrics.incNumBytesCommittedCount(
+        if (r.getResult() != ContainerProtos.Result.SUCCESS) {
+          StorageContainerException sce =
+              new StorageContainerException(r.getMessage(), r.getResult());
+          LOG.error(gid + ": ApplyTransaction failed: cmd " + r.getCmdType()
+              + " logIndex " + index + " Error message: " + r.getMessage()
+              + " Container Result: " + r.getResult());
+          metrics.incNumApplyTransactionsFails();
+          ratisServer.handleApplyTransactionFailure(gid, trx.getServerRole());
+          // Since the applyTransaction now is completed exceptionally,
+          // before any further snapshot is taken , the exception will be
+          // caught in stateMachineUpdater in Ratis and ratis server will
+          // shutdown.
+          applyTransactionFuture.completeExceptionally(sce);
+        } else {
+          metrics.incNumBytesWrittenCount(
               requestProto.getWriteChunk().getChunkData().getLen());
+          LOG.debug(gid + ": ApplyTransaction completed: cmd " + r.getCmdType()
+              + " logIndex " + index + " Error message: " + r.getMessage()
+              + " Container Result: " + r.getResult());
+          applyTransactionFuture.complete(r::toByteString);
+          if (cmdType == Type.WriteChunk || cmdType == Type.PutSmallFile) {
+            metrics.incNumBytesCommittedCount(
+                requestProto.getWriteChunk().getChunkData().getLen());
+          }
         }
+
+        final Long previous = applyTransactionCompletionMap
+            .put(index, trx.getLogEntry().getTerm());
+        Preconditions.checkState(previous == null);
         updateLastApplied();
-      }).whenComplete((r, t) -> applyTransactionSemaphore.release());
-      return future;
+        applyTransactionSemaphore.release();
 
 Review comment:
   I think we should keep the whenComplete() stage at the end.
   releasing the semaphore from a whenComplete() stage guarantees that the 
semaphore will be released even if the processing inside thenApply() stage hits 
an exception. This seems to me to be a good practice.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to