This is an automated email from the ASF dual-hosted git repository.
ivandika pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new a5658eebf7f HDDS-9279. Basic implementation of OM Follower read (#9222)
a5658eebf7f is described below
commit a5658eebf7fd87380d1d5eab301350bafcfe3a3e
Author: Symious <[email protected]>
AuthorDate: Tue Nov 18 10:35:28 2025 +0800
HDDS-9279. Basic implementation of OM Follower read (#9222)
---
.../ozone/om/ha/OMFailoverProxyProviderBase.java | 4 +-
.../apache/hadoop/ozone/freon/FollowerReader.java | 96 ++++++++++++++++++++++
.../hadoop/ozone/om/TestOMRatisSnapshots.java | 1 +
.../snapshot/TestSnapshotBackgroundServices.java | 1 +
.../hadoop/ozone/om/execution/OMExecutionFlow.java | 8 +-
.../ozone/om/ratis/OzoneManagerRatisServer.java | 20 +++--
.../om/ratis/OzoneManagerRatisServerConfig.java | 37 +++++++++
...OzoneManagerProtocolServerSideTranslatorPB.java | 6 +-
.../hadoop/ozone/om/failover/TestOMFailovers.java | 2 +-
9 files changed, 162 insertions(+), 13 deletions(-)
diff --git
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProviderBase.java
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProviderBase.java
index 356d3fb1eff..28c5597196e 100644
---
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProviderBase.java
+++
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProviderBase.java
@@ -24,6 +24,7 @@
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
@@ -445,7 +446,8 @@ protected synchronized void setOmProxies(Map<String,
}
protected synchronized void setOmNodeIDList(List<String> omNodeIDList) {
- this.omNodeIDList = omNodeIDList;
+ Collections.shuffle(omNodeIDList);
+ this.omNodeIDList = Collections.unmodifiableList(omNodeIDList);
}
protected synchronized List<String> getOmNodeIDList() {
diff --git
a/hadoop-ozone/freon/src/main/java/org/apache/hadoop/ozone/freon/FollowerReader.java
b/hadoop-ozone/freon/src/main/java/org/apache/hadoop/ozone/freon/FollowerReader.java
new file mode 100644
index 00000000000..ee5939e4c5e
--- /dev/null
+++
b/hadoop-ozone/freon/src/main/java/org/apache/hadoop/ozone/freon/FollowerReader.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.freon;
+
+import com.codahale.metrics.Timer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.Callable;
+import org.apache.hadoop.hdds.cli.HddsVersionProvider;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.ozone.client.OzoneClient;
+import org.kohsuke.MetaInfServices;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import picocli.CommandLine;
+import picocli.CommandLine.Command;
+
+/**
+ * Data generator tool test om performance.
+ */
+@Command(name = "fr",
+ aliases = "follower-reader",
+ description = "Read the same keySize from multiple threads.",
+ versionProvider = HddsVersionProvider.class,
+ mixinStandardHelpOptions = true,
+ showDefaultValues = true)
+@MetaInfServices(FreonSubcommand.class)
+public class FollowerReader extends BaseFreonGenerator
+ implements Callable<Void> {
+
+ private static final Logger LOG =
+ LoggerFactory.getLogger(FollowerReader.class);
+
+ @CommandLine.Option(names = {"-v", "--volume"},
+ description = "Name of the bucket which contains the test data. Will be"
+ + " created if missing.",
+ defaultValue = "vol1")
+ private String volumeName;
+
+ @CommandLine.Option(names = {"-b", "--bucket"},
+ description = "Name of the bucket which contains the test data.",
+ defaultValue = "bucket1")
+ private String bucketName;
+
+ @CommandLine.Option(names = {"-k", "--key"},
+ description = "Name of the key which contains the test data.",
+ defaultValue = "key1")
+ private String keyName;
+
+ private String omServiceID = null;
+
+ private Timer timer;
+
+ private final List<OzoneClient> rpcClients = new ArrayList<>();
+
+ @Override
+ public Void call() throws Exception {
+ init();
+ OzoneConfiguration ozoneConfiguration = createOzoneConfiguration();
+
+ for (int i = 0; i < getThreadNo(); i++) {
+ OzoneClient rpcClient = createOzoneClient(omServiceID,
ozoneConfiguration);
+ rpcClients.add(rpcClient);
+ }
+
+ timer = getMetrics().timer("follower-read");
+
+ runTests(this::readKeySize);
+ return null;
+ }
+
+ private void readKeySize(long counter) throws Exception {
+ int clientIdx = (int) (counter % rpcClients.size());
+ timer.time(() -> {
+ long unused =
rpcClients.get(clientIdx).getObjectStore().getVolume(volumeName)
+ .getBucket(bucketName).getKey(keyName).getDataSize();
+ return null;
+ });
+ }
+
+}
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java
index 051addfd8ff..8fb519bd8f2 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java
@@ -151,6 +151,7 @@ public void init(TestInfo testInfo) throws Exception {
conf.getObject(OzoneManagerRatisServerConfig.class);
omRatisConf.setLogAppenderWaitTimeMin(10);
conf.setFromObject(omRatisConf);
+ conf.set("ozone.om.client.rpc.timeout", "1m");
cluster = MiniOzoneCluster.newHABuilder(conf)
.setOMServiceId("om-service-test1")
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotBackgroundServices.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotBackgroundServices.java
index 635002aa098..ad42cc35845 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotBackgroundServices.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotBackgroundServices.java
@@ -111,6 +111,7 @@ public void init(TestInfo testInfo) throws Exception {
OzoneManagerRatisServerConfig omRatisConf =
conf.getObject(OzoneManagerRatisServerConfig.class);
omRatisConf.setLogAppenderWaitTimeMin(10);
conf.setFromObject(omRatisConf);
+ conf.set("ozone.om.client.rpc.timeout", "1m");
conf.setInt(OMConfigKeys.OZONE_OM_RATIS_LOG_PURGE_GAP, LOG_PURGE_GAP);
conf.setStorageSize(OMConfigKeys.OZONE_OM_RATIS_SEGMENT_SIZE_KEY, 16,
StorageUnit.KB);
conf.setStorageSize(OMConfigKeys.OZONE_OM_RATIS_SEGMENT_PREALLOCATED_SIZE_KEY,
16, StorageUnit.KB);
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/execution/OMExecutionFlow.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/execution/OMExecutionFlow.java
index 4ce714ab3dc..497cd565caa 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/execution/OMExecutionFlow.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/execution/OMExecutionFlow.java
@@ -49,12 +49,12 @@ public OMExecutionFlow(OzoneManager om) {
* @return OMResponse the response of execution
* @throws ServiceException the exception on execution
*/
- public OMResponse submit(OMRequest omRequest) throws ServiceException {
+ public OMResponse submit(OMRequest omRequest, boolean isWrite) throws
ServiceException {
// TODO: currently have only execution after ratis submission, but with
new flow can have switch later
- return submitExecutionToRatis(omRequest);
+ return submitExecutionToRatis(omRequest, isWrite);
}
- private OMResponse submitExecutionToRatis(OMRequest request) throws
ServiceException {
+ private OMResponse submitExecutionToRatis(OMRequest request, boolean
isWrite) throws ServiceException {
// 1. create client request and preExecute
OMClientRequest omClientRequest = null;
final OMRequest requestToSubmit;
@@ -73,7 +73,7 @@ private OMResponse submitExecutionToRatis(OMRequest request)
throws ServiceExcep
}
// 2. submit request to ratis
- OMResponse response =
ozoneManager.getOmRatisServer().submitRequest(requestToSubmit);
+ OMResponse response =
ozoneManager.getOmRatisServer().submitRequest(requestToSubmit, isWrite);
if (!response.getSuccess()) {
omClientRequest.handleRequestFailure(ozoneManager);
}
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
index e3beae915d8..917ae342818 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
@@ -92,6 +92,7 @@
import org.apache.ratis.rpc.SupportedRpcType;
import org.apache.ratis.server.RaftServer;
import org.apache.ratis.server.RaftServerConfigKeys;
+import org.apache.ratis.server.RaftServerConfigKeys.Read;
import org.apache.ratis.server.RetryCache;
import org.apache.ratis.server.protocol.TermIndex;
import org.apache.ratis.server.storage.RaftStorage;
@@ -124,6 +125,7 @@ public final class OzoneManagerRatisServer {
private final ClientId clientId = ClientId.randomId();
private static final AtomicLong CALL_ID_COUNTER = new AtomicLong();
+ private final Read.Option readOption;
private static long nextCallId() {
return CALL_ID_COUNTER.getAndIncrement() & Long.MAX_VALUE;
@@ -171,6 +173,8 @@ private OzoneManagerRatisServer(ConfigurationSource conf,
OzoneManager om,
}
this.omStateMachine = getStateMachine(conf);
+ this.readOption = RaftServerConfigKeys.Read.option(serverProperties);
+
Parameters parameters = createServerTlsParameters(secConfig, certClient);
this.server = RaftServer.newBuilder()
.setServerId(this.raftPeerId)
@@ -239,11 +243,11 @@ public static OzoneManagerRatisServer newOMRatisServer(
* @return OMResponse - response returned to the client.
* @throws ServiceException
*/
- public OMResponse submitRequest(OMRequest omRequest) throws ServiceException
{
+ public OMResponse submitRequest(OMRequest omRequest, boolean isWrite) throws
ServiceException {
// In prepare mode, only prepare and cancel requests are allowed to go
// through.
if (ozoneManager.getPrepareState().requestAllowed(omRequest.getCmdType()))
{
- RaftClientRequest raftClientRequest = createRaftRequest(omRequest);
+ RaftClientRequest raftClientRequest = createRaftRequest(omRequest,
isWrite);
RaftClientReply raftClientReply =
submitRequestToRatis(raftClientRequest);
return createOmResponse(omRequest, raftClientReply);
} else {
@@ -277,10 +281,10 @@ private RaftClientReply submitRequestToRatis(
() -> submitRequestToRatisImpl(raftClientRequest));
}
- private RaftClientRequest createRaftRequest(OMRequest omRequest) {
+ private RaftClientRequest createRaftRequest(OMRequest omRequest, boolean
isWrite) {
return captureLatencyNs(
perfMetrics.getCreateRatisRequestLatencyNs(),
- () -> createRaftRequestImpl(omRequest));
+ () -> createRaftRequestImpl(omRequest, isWrite));
}
/**
@@ -500,7 +504,7 @@ public void removeRaftPeer(OMNodeDetails omNodeDetails) {
* @return RaftClientRequest - Raft Client request which is submitted to
* ratis server.
*/
- private RaftClientRequest createRaftRequestImpl(OMRequest omRequest) {
+ private RaftClientRequest createRaftRequestImpl(OMRequest omRequest, boolean
isWrite) {
return RaftClientRequest.newBuilder()
.setClientId(getClientId())
.setServerId(server.getId())
@@ -509,7 +513,7 @@ private RaftClientRequest createRaftRequestImpl(OMRequest
omRequest) {
.setMessage(
Message.valueOf(
OMRatisHelper.convertRequestToByteString(omRequest)))
- .setType(RaftClientRequest.writeRequestType())
+ .setType(isWrite ? RaftClientRequest.writeRequestType() :
RaftClientRequest.readRequestType())
.build();
}
@@ -647,6 +651,10 @@ public RaftServer.Division getServerDivision() {
return serverDivision.get();
}
+ public boolean isLinearizableRead() {
+ return readOption == Read.Option.LINEARIZABLE;
+ }
+
/**
* Initializes and returns OzoneManager StateMachine.
*/
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServerConfig.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServerConfig.java
index a3703dfe63b..2624fc8b2b8 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServerConfig.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServerConfig.java
@@ -52,6 +52,27 @@ public class OzoneManagerRatisServerConfig {
)
private long retryCacheTimeout = Duration.ofSeconds(300).toMillis();
+ @Config(key = "read.option",
+ defaultValue = "DEFAULT",
+ type = ConfigType.STRING,
+ tags = {OZONE, OM, RATIS, PERFORMANCE},
+ description = "Select the Ratis server read option." +
+ " Possible values are: " +
+ " DEFAULT - Directly query statemachine (non-linearizable). "
+
+ " Only the leader can serve read requests. " +
+ " LINEARIZABLE - Use ReadIndex (see Raft Paper section 6.4) to
maintain linearizability. " +
+ " Both the leader and the followers can serve read requests."
+ )
+ private String readOption;
+
+ @Config(key = "read.leader.lease.enabled",
+ defaultValue = "false",
+ type = ConfigType.BOOLEAN,
+ tags = {OZONE, OM, RATIS, PERFORMANCE},
+ description = "If we enabled the leader lease on Ratis Leader."
+ )
+ private boolean readLeaderLeaseEnabled;
+
public long getLogAppenderWaitTimeMin() {
return logAppenderWaitTimeMin;
}
@@ -67,4 +88,20 @@ public long getRetryCacheTimeout() {
public void setRetryCacheTimeout(Duration duration) {
this.retryCacheTimeout = duration.toMillis();
}
+
+ public String getReadOption() {
+ return readOption;
+ }
+
+ public void setReadOption(String option) {
+ this.readOption = option;
+ }
+
+ public boolean isReadLeaderLeaseEnabled() {
+ return readLeaderLeaseEnabled;
+ }
+
+ public void setReadLeaderLeaseEnabled(boolean readLeaderLeaseEnabled) {
+ this.readLeaderLeaseEnabled = readLeaderLeaseEnabled;
+ }
}
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
index 251e81e83ed..69e4e6d8f1a 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
@@ -171,7 +171,7 @@ private OMResponse internalProcessRequest(OMRequest
request) throws ServiceExcep
}
this.lastRequestToSubmit = request;
- return ozoneManager.getOmExecutionFlow().submit(request);
+ return ozoneManager.getOmExecutionFlow().submit(request, true);
} finally {
OzoneManager.setS3Auth(null);
}
@@ -184,6 +184,10 @@ public OMRequest getLastRequestToSubmit() {
private OMResponse submitReadRequestToOM(OMRequest request)
throws ServiceException {
+ // Read from leader or followers using linearizable read
+ if (omRatisServer.isLinearizableRead()) {
+ return ozoneManager.getOmExecutionFlow().submit(request, false);
+ }
// Check if this OM is the leader.
RaftServerStatus raftServerStatus = omRatisServer.getLeaderStatus();
if (raftServerStatus == LEADER_AND_READY ||
diff --git
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/failover/TestOMFailovers.java
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/failover/TestOMFailovers.java
index 4e2b0c7c66a..0de7052c4ed 100644
---
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/failover/TestOMFailovers.java
+++
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/failover/TestOMFailovers.java
@@ -74,7 +74,7 @@ public void testAccessContorlExceptionFailovers() throws
Exception {
// Request should try all OMs one be one and fail when the last OM also
// throws AccessControlException.
assertThat(serviceException).hasCauseInstanceOf(AccessControlException.class)
- .hasMessage("ServiceException of type class
org.apache.hadoop.security.AccessControlException for om3");
+ .hasMessageStartingWith("ServiceException of type class
org.apache.hadoop.security.AccessControlException");
assertThat(logCapturer.getOutput()).contains(getRetryProxyDebugMsg("om1"));
assertThat(logCapturer.getOutput()).contains(getRetryProxyDebugMsg("om2"));
assertThat(logCapturer.getOutput()).contains(getRetryProxyDebugMsg("om3"));
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]