This is an automated email from the ASF dual-hosted git repository.
duong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 549da73d0f HDDS-9425. Breakdown metrics for OM writes (#5421)
549da73d0f is described below
commit 549da73d0f836a6adae3e5adebf9d5af9ee766ed
Author: Duong Nguyen <[email protected]>
AuthorDate: Fri Oct 20 16:19:52 2023 -0700
HDDS-9425. Breakdown metrics for OM writes (#5421)
---
.../java/org/apache/hadoop/util/MetricUtil.java | 2 +-
.../hadoop/ozone/om/OMPerformanceMetrics.java | 50 ++++++++++++++++++++++
.../ozone/om/ratis/OzoneManagerRatisServer.java | 44 ++++++++++++++-----
...OzoneManagerProtocolServerSideTranslatorPB.java | 29 +++++++++----
.../protocolPB/OzoneManagerRequestHandler.java | 13 +++---
5 files changed, 112 insertions(+), 26 deletions(-)
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/util/MetricUtil.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/util/MetricUtil.java
index 5f791c17c4..879f8ed0cc 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/util/MetricUtil.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/util/MetricUtil.java
@@ -31,7 +31,7 @@ public final class MetricUtil {
private MetricUtil() {
}
- public static <T, E extends IOException> T captureLatencyNs(
+ public static <T, E extends Exception> T captureLatencyNs(
MutableRate metric,
CheckedSupplier<T, E> block) throws E {
long start = Time.monotonicNowNanos();
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMPerformanceMetrics.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMPerformanceMetrics.java
index 5b0829a088..25223c502c 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMPerformanceMetrics.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMPerformanceMetrics.java
@@ -92,6 +92,28 @@ public class OMPerformanceMetrics {
@Metric(about = "listKeys latency in nanoseconds")
private MutableRate listKeysLatencyNs;
+ @Metric(about = "Validate request latency in nano seconds")
+ private MutableRate validateRequestLatencyNs;
+
+ @Metric(about = "Validate response latency in nano seconds")
+ private MutableRate validateResponseLatencyNs;
+
+ @Metric(about = "PreExecute latency in nano seconds")
+ private MutableRate preExecuteLatencyNs;
+
+ @Metric(about = "Ratis latency in nano seconds")
+ private MutableRate submitToRatisLatencyNs;
+
+ @Metric(about = "Convert om request to ratis request nano seconds")
+ private MutableRate createRatisRequestLatencyNs;
+
+ @Metric(about = "Convert ratis response to om response nano seconds")
+ private MutableRate createOmResoonseLatencyNs;
+
+ @Metric(about = "Ratis local command execution latency in nano seconds")
+ private MutableRate validateAndUpdateCacneLatencyNs;
+
+
public void addLookupLatency(long latencyInNs) {
lookupLatencyNs.add(latencyInNs);
}
@@ -160,4 +182,32 @@ public class OMPerformanceMetrics {
public void addListKeysLatencyNs(long latencyInNs) {
listKeysLatencyNs.add(latencyInNs);
}
+
+ public MutableRate getValidateRequestLatencyNs() {
+ return validateRequestLatencyNs;
+ }
+
+ public MutableRate getValidateResponseLatencyNs() {
+ return validateResponseLatencyNs;
+ }
+
+ public MutableRate getPreExecuteLatencyNs() {
+ return preExecuteLatencyNs;
+ }
+
+ public MutableRate getSubmitToRatisLatencyNs() {
+ return submitToRatisLatencyNs;
+ }
+
+ public MutableRate getCreateRatisRequestLatencyNs() {
+ return createRatisRequestLatencyNs;
+ }
+
+ public MutableRate getCreateOmResponseLatencyNs() {
+ return createOmResoonseLatencyNs;
+ }
+
+ public MutableRate getValidateAndUpdateCacneLatencyNs() {
+ return validateAndUpdateCacneLatencyNs;
+ }
}
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
index 77e2599be7..712dff4073 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
@@ -50,6 +50,7 @@ import
org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient
import org.apache.hadoop.hdds.tracing.TracingUtil;
import org.apache.hadoop.ipc.ProtobufRpcEngine.Server;
import org.apache.hadoop.ozone.om.OMConfigKeys;
+import org.apache.hadoop.ozone.om.OMPerformanceMetrics;
import org.apache.hadoop.ozone.om.OzoneManager;
import org.apache.hadoop.ozone.om.exceptions.OMException;
import org.apache.hadoop.ozone.om.exceptions.OMLeaderNotReadyException;
@@ -96,6 +97,7 @@ import static
org.apache.hadoop.ipc.RpcConstants.DUMMY_CLIENT_ID;
import static org.apache.hadoop.ipc.RpcConstants.INVALID_CALL_ID;
import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_HA_PREFIX;
import static
org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils.createServerTlsConfig;
+import static org.apache.hadoop.util.MetricUtil.captureLatencyNs;
/**
* Creates a Ratis server endpoint for OM.
@@ -115,6 +117,7 @@ public final class OzoneManagerRatisServer {
private final OzoneManager ozoneManager;
private final OzoneManagerStateMachine omStateMachine;
private final String ratisStorageDir;
+ private final OMPerformanceMetrics perfMetrics;
private final ClientId clientId = ClientId.randomId();
private static final AtomicLong CALL_ID_COUNTER = new AtomicLong();
@@ -174,6 +177,7 @@ public final class OzoneManagerRatisServer {
.setParameters(parameters)
.setStateMachine(omStateMachine)
.build();
+ this.perfMetrics = om.getPerfMetrics();
}
/**
@@ -246,11 +250,9 @@ public final class OzoneManagerRatisServer {
// In prepare mode, only prepare and cancel requests are allowed to go
// through.
if (ozoneManager.getPrepareState().requestAllowed(omRequest.getCmdType()))
{
- RaftClientRequest raftClientRequest =
- createWriteRaftClientRequest(omRequest);
+ RaftClientRequest raftClientRequest = createRaftRequest(omRequest);
RaftClientReply raftClientReply =
submitRequestToRatis(raftClientRequest);
-
- return processReply(omRequest, raftClientReply);
+ return createOmResponse(omRequest, raftClientReply);
} else {
LOG.info("Rejecting write request on OM {} because it is in prepare " +
"mode: {}", ozoneManager.getOMNodeId(),
@@ -268,6 +270,27 @@ public final class OzoneManagerRatisServer {
}
}
+ private OMResponse createOmResponse(OMRequest omRequest,
+ RaftClientReply raftClientReply) throws ServiceException {
+ return captureLatencyNs(
+ perfMetrics.getCreateOmResponseLatencyNs(),
+ () -> createOmResponseImpl(omRequest, raftClientReply));
+ }
+
+ private RaftClientReply submitRequestToRatis(
+ RaftClientRequest raftClientRequest) throws ServiceException {
+ return captureLatencyNs(
+ perfMetrics.getSubmitToRatisLatencyNs(),
+ () -> submitRequestToRatisImpl(raftClientRequest));
+ }
+
+ private RaftClientRequest createRaftRequest(OMRequest omRequest) {
+ RaftClientRequest raftClientRequest = captureLatencyNs(
+ perfMetrics.getCreateRatisRequestLatencyNs(),
+ () -> createRaftRequestImpl(omRequest));
+ return raftClientRequest;
+ }
+
/**
* API used internally from OzoneManager Server when requests needs to be
* submitted to ratis, where the crafted RaftClientRequest is passed along.
@@ -278,11 +301,12 @@ public final class OzoneManagerRatisServer {
*/
public OMResponse submitRequest(OMRequest omRequest,
RaftClientRequest raftClientRequest) throws ServiceException {
- RaftClientReply raftClientReply = submitRequestToRatis(raftClientRequest);
- return processReply(omRequest, raftClientReply);
+ RaftClientReply raftClientReply =
+ submitRequestToRatis(raftClientRequest);
+ return createOmResponse(omRequest, raftClientReply);
}
- private RaftClientReply submitRequestToRatis(
+ private RaftClientReply submitRequestToRatisImpl(
RaftClientRequest raftClientRequest) throws ServiceException {
try {
return server.submitClientRequestAsync(raftClientRequest)
@@ -420,7 +444,7 @@ public final class OzoneManagerRatisServer {
* @return RaftClientRequest - Raft Client request which is submitted to
* ratis server.
*/
- private RaftClientRequest createWriteRaftClientRequest(OMRequest omRequest) {
+ private RaftClientRequest createRaftRequestImpl(OMRequest omRequest) {
if (!ozoneManager.isTestSecureOmFlag()) {
Preconditions.checkArgument(Server.getClientId() != DUMMY_CLIENT_ID);
Preconditions.checkArgument(Server.getCallId() != INVALID_CALL_ID);
@@ -445,8 +469,8 @@ public final class OzoneManagerRatisServer {
* @return OMResponse - response which is returned to client.
* @throws ServiceException
*/
- private OMResponse processReply(OMRequest omRequest, RaftClientReply reply)
- throws ServiceException {
+ private OMResponse createOmResponseImpl(OMRequest omRequest,
+ RaftClientReply reply) throws ServiceException {
// NotLeader exception is thrown only when the raft server to which the
// request is submitted is not the leader. This can happen first time
// when client is submitting request to OM.
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
index d26cce0c13..ec520ad207 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
@@ -20,6 +20,7 @@ import static
org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer.RaftServe
import static
org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer.RaftServerStatus.NOT_LEADER;
import static
org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils.createClientRequest;
import static
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type.PrepareStatus;
+import static org.apache.hadoop.util.MetricUtil.captureLatencyNs;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
@@ -30,6 +31,7 @@ import
org.apache.hadoop.hdds.server.OzoneProtocolMessageDispatcher;
import org.apache.hadoop.hdds.tracing.TracingUtil;
import org.apache.hadoop.hdds.utils.ProtocolMessageMetrics;
import org.apache.hadoop.ozone.OmUtils;
+import org.apache.hadoop.ozone.om.OMPerformanceMetrics;
import org.apache.hadoop.ozone.om.OzoneManager;
import org.apache.hadoop.ozone.om.exceptions.OMException;
import org.apache.hadoop.ozone.om.exceptions.OMLeaderNotReadyException;
@@ -76,6 +78,7 @@ public class OzoneManagerProtocolServerSideTranslatorPB
implements
private final OzoneProtocolMessageDispatcher<OMRequest, OMResponse,
ProtocolMessageEnum> dispatcher;
private final RequestValidations requestValidations;
+ private final OMPerformanceMetrics perfMetrics;
// always true, only used in tests
private boolean shouldFlushCache = true;
@@ -92,6 +95,7 @@ public class OzoneManagerProtocolServerSideTranslatorPB
implements
boolean enableRatis,
long lastTransactionIndexForNonRatis) {
this.ozoneManager = impl;
+ this.perfMetrics = impl.getPerfMetrics();
this.isRatisEnabled = enableRatis;
// Update the transactionIndex with the last TransactionIndex read from DB.
// New requests should have transactionIndex incremented from this index
@@ -140,7 +144,9 @@ public class OzoneManagerProtocolServerSideTranslatorPB
implements
OMRequest request) throws ServiceException {
OMRequest validatedRequest;
try {
- validatedRequest = requestValidations.validateRequest(request);
+ validatedRequest = captureLatencyNs(
+ perfMetrics.getValidateRequestLatencyNs(),
+ () -> requestValidations.validateRequest(request));
} catch (Exception e) {
if (e instanceof OMException) {
return createErrorResponse(request, (OMException) e);
@@ -149,16 +155,14 @@ public class OzoneManagerProtocolServerSideTranslatorPB
implements
}
OMResponse response = dispatcher.processRequest(validatedRequest,
- this::processRequest,
- request.getCmdType(),
- request.getTraceID());
+ this::processRequest, request.getCmdType(), request.getTraceID());
- return requestValidations.validateResponse(request, response);
+ return captureLatencyNs(perfMetrics.getValidateResponseLatencyNs(),
+ () -> requestValidations.validateResponse(request, response));
}
@VisibleForTesting
public OMResponse processRequest(OMRequest request) throws ServiceException {
- OMClientRequest omClientRequest = null;
boolean s3Auth = false;
try {
@@ -187,13 +191,16 @@ public class OzoneManagerProtocolServerSideTranslatorPB
implements
if (!s3Auth) {
OzoneManagerRatisUtils.checkLeaderStatus(ozoneManager);
}
+ OMClientRequest omClientRequest = null;
+ OMRequest requestToSubmit;
try {
omClientRequest = createClientRequest(request, ozoneManager);
// TODO: Note: Due to HDDS-6055, createClientRequest() could now
// return null, which triggered the findbugs warning.
// Added the assertion.
assert (omClientRequest != null);
- request = omClientRequest.preExecute(ozoneManager);
+ OMClientRequest finalOmClientRequest = omClientRequest;
+ requestToSubmit = preExecute(finalOmClientRequest);
} catch (IOException ex) {
if (omClientRequest != null) {
omClientRequest.handleRequestFailure(ozoneManager);
@@ -201,7 +208,7 @@ public class OzoneManagerProtocolServerSideTranslatorPB
implements
return createErrorResponse(request, ex);
}
- OMResponse response = submitRequestToRatis(request);
+ OMResponse response = submitRequestToRatis(requestToSubmit);
if (!response.getSuccess()) {
omClientRequest.handleRequestFailure(ozoneManager);
}
@@ -211,6 +218,12 @@ public class OzoneManagerProtocolServerSideTranslatorPB
implements
}
}
+ private OMRequest preExecute(OMClientRequest finalOmClientRequest)
+ throws IOException {
+ return captureLatencyNs(perfMetrics.getPreExecuteLatencyNs(),
+ () -> finalOmClientRequest.preExecute(ozoneManager));
+ }
+
/**
* Submits request to OM's Ratis server.
*/
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerRequestHandler.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerRequestHandler.java
index 080f48cfcc..30333f735e 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerRequestHandler.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerRequestHandler.java
@@ -151,6 +151,7 @@ import static
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.
import static
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.MultipartUploadInfo;
import static
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OzoneAclInfo;
import static
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PartInfo;
+import static org.apache.hadoop.util.MetricUtil.captureLatencyNs;
import org.apache.hadoop.ozone.upgrade.UpgradeFinalizer.StatusAndMessages;
import org.apache.hadoop.util.ProtobufUtils;
@@ -374,14 +375,12 @@ public class OzoneManagerRequestHandler implements
RequestHandler {
@Override
public OMClientResponse handleWriteRequest(OMRequest omRequest,
long transactionLogIndex) throws IOException {
- OMClientRequest omClientRequest = null;
- OMClientResponse omClientResponse = null;
- omClientRequest =
+ OMClientRequest omClientRequest =
OzoneManagerRatisUtils.createClientRequest(omRequest, impl);
- omClientResponse = omClientRequest
- .validateAndUpdateCache(getOzoneManager(), transactionLogIndex,
- ozoneManagerDoubleBuffer::add);
- return omClientResponse;
+ return captureLatencyNs(
+ impl.getPerfMetrics().getValidateAndUpdateCacneLatencyNs(),
+ () -> omClientRequest.validateAndUpdateCache(getOzoneManager(),
+ transactionLogIndex, ozoneManagerDoubleBuffer::add));
}
@Override
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]