This is an automated email from the ASF dual-hosted git repository.

duong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 549da73d0f HDDS-9425. Breakdown metrics for OM writes (#5421)
549da73d0f is described below

commit 549da73d0f836a6adae3e5adebf9d5af9ee766ed
Author: Duong Nguyen <[email protected]>
AuthorDate: Fri Oct 20 16:19:52 2023 -0700

    HDDS-9425. Breakdown metrics for OM writes (#5421)
---
 .../java/org/apache/hadoop/util/MetricUtil.java    |  2 +-
 .../hadoop/ozone/om/OMPerformanceMetrics.java      | 50 ++++++++++++++++++++++
 .../ozone/om/ratis/OzoneManagerRatisServer.java    | 44 ++++++++++++++-----
 ...OzoneManagerProtocolServerSideTranslatorPB.java | 29 +++++++++----
 .../protocolPB/OzoneManagerRequestHandler.java     | 13 +++---
 5 files changed, 112 insertions(+), 26 deletions(-)

diff --git 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/util/MetricUtil.java 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/util/MetricUtil.java
index 5f791c17c4..879f8ed0cc 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/util/MetricUtil.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/util/MetricUtil.java
@@ -31,7 +31,7 @@ public final class MetricUtil {
   private MetricUtil() {
   }
 
-  public static <T, E extends IOException> T captureLatencyNs(
+  public static <T, E extends Exception> T captureLatencyNs(
       MutableRate metric,
       CheckedSupplier<T, E> block) throws E {
     long start = Time.monotonicNowNanos();
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMPerformanceMetrics.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMPerformanceMetrics.java
index 5b0829a088..25223c502c 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMPerformanceMetrics.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMPerformanceMetrics.java
@@ -92,6 +92,28 @@ public class OMPerformanceMetrics {
   @Metric(about = "listKeys latency in nanoseconds")
   private MutableRate listKeysLatencyNs;
 
+  @Metric(about = "Validate request latency in nano seconds")
+  private MutableRate validateRequestLatencyNs;
+
+  @Metric(about = "Validate response latency in nano seconds")
+  private MutableRate validateResponseLatencyNs;
+
+  @Metric(about = "PreExecute latency in nano seconds")
+  private MutableRate preExecuteLatencyNs;
+
+  @Metric(about = "Ratis latency in nano seconds")
+  private MutableRate submitToRatisLatencyNs;
+
+  @Metric(about = "Convert om request to ratis request nano seconds")
+  private MutableRate createRatisRequestLatencyNs;
+
+  @Metric(about = "Convert ratis response to om response nano seconds")
+  private MutableRate createOmResoonseLatencyNs;
+
+  @Metric(about = "Ratis local command execution latency in nano seconds")
+  private MutableRate validateAndUpdateCacneLatencyNs;
+
+
   public void addLookupLatency(long latencyInNs) {
     lookupLatencyNs.add(latencyInNs);
   }
@@ -160,4 +182,32 @@ public class OMPerformanceMetrics {
   public void addListKeysLatencyNs(long latencyInNs) {
     listKeysLatencyNs.add(latencyInNs);
   }
+
+  public MutableRate getValidateRequestLatencyNs() {
+    return validateRequestLatencyNs;
+  }
+
+  public MutableRate getValidateResponseLatencyNs() {
+    return validateResponseLatencyNs;
+  }
+
+  public MutableRate getPreExecuteLatencyNs() {
+    return preExecuteLatencyNs;
+  }
+
+  public MutableRate getSubmitToRatisLatencyNs() {
+    return submitToRatisLatencyNs;
+  }
+
+  public MutableRate getCreateRatisRequestLatencyNs() {
+    return createRatisRequestLatencyNs;
+  }
+
+  public MutableRate getCreateOmResponseLatencyNs() {
+    return createOmResoonseLatencyNs;
+  }
+
+  public MutableRate getValidateAndUpdateCacneLatencyNs() {
+    return validateAndUpdateCacneLatencyNs;
+  }
 }
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
index 77e2599be7..712dff4073 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
@@ -50,6 +50,7 @@ import 
org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient
 import org.apache.hadoop.hdds.tracing.TracingUtil;
 import org.apache.hadoop.ipc.ProtobufRpcEngine.Server;
 import org.apache.hadoop.ozone.om.OMConfigKeys;
+import org.apache.hadoop.ozone.om.OMPerformanceMetrics;
 import org.apache.hadoop.ozone.om.OzoneManager;
 import org.apache.hadoop.ozone.om.exceptions.OMException;
 import org.apache.hadoop.ozone.om.exceptions.OMLeaderNotReadyException;
@@ -96,6 +97,7 @@ import static 
org.apache.hadoop.ipc.RpcConstants.DUMMY_CLIENT_ID;
 import static org.apache.hadoop.ipc.RpcConstants.INVALID_CALL_ID;
 import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_HA_PREFIX;
 import static 
org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils.createServerTlsConfig;
+import static org.apache.hadoop.util.MetricUtil.captureLatencyNs;
 
 /**
  * Creates a Ratis server endpoint for OM.
@@ -115,6 +117,7 @@ public final class OzoneManagerRatisServer {
   private final OzoneManager ozoneManager;
   private final OzoneManagerStateMachine omStateMachine;
   private final String ratisStorageDir;
+  private final OMPerformanceMetrics perfMetrics;
 
   private final ClientId clientId = ClientId.randomId();
   private static final AtomicLong CALL_ID_COUNTER = new AtomicLong();
@@ -174,6 +177,7 @@ public final class OzoneManagerRatisServer {
         .setParameters(parameters)
         .setStateMachine(omStateMachine)
         .build();
+    this.perfMetrics = om.getPerfMetrics();
   }
 
   /**
@@ -246,11 +250,9 @@ public final class OzoneManagerRatisServer {
     // In prepare mode, only prepare and cancel requests are allowed to go
     // through.
     if (ozoneManager.getPrepareState().requestAllowed(omRequest.getCmdType())) 
{
-      RaftClientRequest raftClientRequest =
-          createWriteRaftClientRequest(omRequest);
+      RaftClientRequest raftClientRequest = createRaftRequest(omRequest);
       RaftClientReply raftClientReply = 
submitRequestToRatis(raftClientRequest);
-
-      return processReply(omRequest, raftClientReply);
+      return createOmResponse(omRequest, raftClientReply);
     } else {
       LOG.info("Rejecting write request on OM {} because it is in prepare " +
           "mode: {}", ozoneManager.getOMNodeId(),
@@ -268,6 +270,27 @@ public final class OzoneManagerRatisServer {
     }
   }
 
+  private OMResponse createOmResponse(OMRequest omRequest,
+      RaftClientReply raftClientReply) throws ServiceException {
+    return captureLatencyNs(
+        perfMetrics.getCreateOmResponseLatencyNs(),
+        () -> createOmResponseImpl(omRequest, raftClientReply));
+  }
+
+  private RaftClientReply submitRequestToRatis(
+      RaftClientRequest raftClientRequest) throws ServiceException {
+    return captureLatencyNs(
+        perfMetrics.getSubmitToRatisLatencyNs(),
+        () -> submitRequestToRatisImpl(raftClientRequest));
+  }
+
+  private RaftClientRequest createRaftRequest(OMRequest omRequest) {
+    RaftClientRequest raftClientRequest = captureLatencyNs(
+        perfMetrics.getCreateRatisRequestLatencyNs(),
+        () -> createRaftRequestImpl(omRequest));
+    return raftClientRequest;
+  }
+
   /**
    * API used internally from OzoneManager Server when requests needs to be
    * submitted to ratis, where the crafted RaftClientRequest is passed along.
@@ -278,11 +301,12 @@ public final class OzoneManagerRatisServer {
    */
   public OMResponse submitRequest(OMRequest omRequest,
       RaftClientRequest raftClientRequest) throws ServiceException {
-    RaftClientReply raftClientReply = submitRequestToRatis(raftClientRequest);
-    return processReply(omRequest, raftClientReply);
+    RaftClientReply raftClientReply =
+        submitRequestToRatis(raftClientRequest);
+    return createOmResponse(omRequest, raftClientReply);
   }
 
-  private RaftClientReply submitRequestToRatis(
+  private RaftClientReply submitRequestToRatisImpl(
       RaftClientRequest raftClientRequest) throws ServiceException {
     try {
       return server.submitClientRequestAsync(raftClientRequest)
@@ -420,7 +444,7 @@ public final class OzoneManagerRatisServer {
    * @return RaftClientRequest - Raft Client request which is submitted to
    * ratis server.
    */
-  private RaftClientRequest createWriteRaftClientRequest(OMRequest omRequest) {
+  private RaftClientRequest createRaftRequestImpl(OMRequest omRequest) {
     if (!ozoneManager.isTestSecureOmFlag()) {
       Preconditions.checkArgument(Server.getClientId() != DUMMY_CLIENT_ID);
       Preconditions.checkArgument(Server.getCallId() != INVALID_CALL_ID);
@@ -445,8 +469,8 @@ public final class OzoneManagerRatisServer {
    * @return OMResponse - response which is returned to client.
    * @throws ServiceException
    */
-  private OMResponse processReply(OMRequest omRequest, RaftClientReply reply)
-      throws ServiceException {
+  private OMResponse createOmResponseImpl(OMRequest omRequest,
+      RaftClientReply reply) throws ServiceException {
     // NotLeader exception is thrown only when the raft server to which the
     // request is submitted is not the leader. This can happen first time
     // when client is submitting request to OM.
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
index d26cce0c13..ec520ad207 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
@@ -20,6 +20,7 @@ import static 
org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer.RaftServe
 import static 
org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer.RaftServerStatus.NOT_LEADER;
 import static 
org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils.createClientRequest;
 import static 
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type.PrepareStatus;
+import static org.apache.hadoop.util.MetricUtil.captureLatencyNs;
 
 import java.io.IOException;
 import java.util.concurrent.ExecutionException;
@@ -30,6 +31,7 @@ import 
org.apache.hadoop.hdds.server.OzoneProtocolMessageDispatcher;
 import org.apache.hadoop.hdds.tracing.TracingUtil;
 import org.apache.hadoop.hdds.utils.ProtocolMessageMetrics;
 import org.apache.hadoop.ozone.OmUtils;
+import org.apache.hadoop.ozone.om.OMPerformanceMetrics;
 import org.apache.hadoop.ozone.om.OzoneManager;
 import org.apache.hadoop.ozone.om.exceptions.OMException;
 import org.apache.hadoop.ozone.om.exceptions.OMLeaderNotReadyException;
@@ -76,6 +78,7 @@ public class OzoneManagerProtocolServerSideTranslatorPB 
implements
   private final OzoneProtocolMessageDispatcher<OMRequest, OMResponse,
       ProtocolMessageEnum> dispatcher;
   private final RequestValidations requestValidations;
+  private final OMPerformanceMetrics perfMetrics;
 
   // always true, only used in tests
   private boolean shouldFlushCache = true;
@@ -92,6 +95,7 @@ public class OzoneManagerProtocolServerSideTranslatorPB 
implements
       boolean enableRatis,
       long lastTransactionIndexForNonRatis) {
     this.ozoneManager = impl;
+    this.perfMetrics = impl.getPerfMetrics();
     this.isRatisEnabled = enableRatis;
     // Update the transactionIndex with the last TransactionIndex read from DB.
     // New requests should have transactionIndex incremented from this index
@@ -140,7 +144,9 @@ public class OzoneManagerProtocolServerSideTranslatorPB 
implements
       OMRequest request) throws ServiceException {
     OMRequest validatedRequest;
     try {
-      validatedRequest = requestValidations.validateRequest(request);
+      validatedRequest = captureLatencyNs(
+          perfMetrics.getValidateRequestLatencyNs(),
+          () -> requestValidations.validateRequest(request));
     } catch (Exception e) {
       if (e instanceof OMException) {
         return createErrorResponse(request, (OMException) e);
@@ -149,16 +155,14 @@ public class OzoneManagerProtocolServerSideTranslatorPB 
implements
     }
 
     OMResponse response = dispatcher.processRequest(validatedRequest,
-        this::processRequest,
-        request.getCmdType(),
-        request.getTraceID());
+        this::processRequest, request.getCmdType(), request.getTraceID());
 
-    return requestValidations.validateResponse(request, response);
+    return captureLatencyNs(perfMetrics.getValidateResponseLatencyNs(),
+        () -> requestValidations.validateResponse(request, response));
   }
 
   @VisibleForTesting
   public OMResponse processRequest(OMRequest request) throws ServiceException {
-    OMClientRequest omClientRequest = null;
     boolean s3Auth = false;
 
     try {
@@ -187,13 +191,16 @@ public class OzoneManagerProtocolServerSideTranslatorPB 
implements
       if (!s3Auth) {
         OzoneManagerRatisUtils.checkLeaderStatus(ozoneManager);
       }
+      OMClientRequest omClientRequest = null;
+      OMRequest requestToSubmit;
       try {
         omClientRequest = createClientRequest(request, ozoneManager);
         // TODO: Note: Due to HDDS-6055, createClientRequest() could now
         //  return null, which triggered the findbugs warning.
         //  Added the assertion.
         assert (omClientRequest != null);
-        request = omClientRequest.preExecute(ozoneManager);
+        OMClientRequest finalOmClientRequest = omClientRequest;
+        requestToSubmit = preExecute(finalOmClientRequest);
       } catch (IOException ex) {
         if (omClientRequest != null) {
           omClientRequest.handleRequestFailure(ozoneManager);
@@ -201,7 +208,7 @@ public class OzoneManagerProtocolServerSideTranslatorPB 
implements
         return createErrorResponse(request, ex);
       }
 
-      OMResponse response = submitRequestToRatis(request);
+      OMResponse response = submitRequestToRatis(requestToSubmit);
       if (!response.getSuccess()) {
         omClientRequest.handleRequestFailure(ozoneManager);
       }
@@ -211,6 +218,12 @@ public class OzoneManagerProtocolServerSideTranslatorPB 
implements
     }
   }
 
+  private OMRequest preExecute(OMClientRequest finalOmClientRequest)
+      throws IOException {
+    return captureLatencyNs(perfMetrics.getPreExecuteLatencyNs(),
+        () -> finalOmClientRequest.preExecute(ozoneManager));
+  }
+
   /**
    * Submits request to OM's Ratis server.
    */
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerRequestHandler.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerRequestHandler.java
index 080f48cfcc..30333f735e 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerRequestHandler.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerRequestHandler.java
@@ -151,6 +151,7 @@ import static 
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.
 import static 
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.MultipartUploadInfo;
 import static 
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OzoneAclInfo;
 import static 
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PartInfo;
+import static org.apache.hadoop.util.MetricUtil.captureLatencyNs;
 
 import org.apache.hadoop.ozone.upgrade.UpgradeFinalizer.StatusAndMessages;
 import org.apache.hadoop.util.ProtobufUtils;
@@ -374,14 +375,12 @@ public class OzoneManagerRequestHandler implements 
RequestHandler {
   @Override
   public OMClientResponse handleWriteRequest(OMRequest omRequest,
       long transactionLogIndex) throws IOException {
-    OMClientRequest omClientRequest = null;
-    OMClientResponse omClientResponse = null;
-    omClientRequest =
+    OMClientRequest omClientRequest =
         OzoneManagerRatisUtils.createClientRequest(omRequest, impl);
-    omClientResponse = omClientRequest
-        .validateAndUpdateCache(getOzoneManager(), transactionLogIndex,
-            ozoneManagerDoubleBuffer::add);
-    return omClientResponse;
+    return captureLatencyNs(
+        impl.getPerfMetrics().getValidateAndUpdateCacneLatencyNs(),
+        () -> omClientRequest.validateAndUpdateCache(getOzoneManager(),
+            transactionLogIndex, ozoneManagerDoubleBuffer::add));
   }
 
   @Override


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to