ivandika3 commented on code in PR #7988: URL: https://github.com/apache/ozone/pull/7988#discussion_r2028589613
########## hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java: ########## @@ -1009,14 +1009,29 @@ public void deleteSnapshot(String volumeName, public OzoneSnapshot getSnapshotInfo(String volumeName, String bucketName, String snapshotName) throws IOException { + return getSnapshotInfo(volumeName, bucketName, snapshotName, null); + } + + /** + * {@inheritDoc} + */ + @Override + public OzoneSnapshot getSnapshotInfo(String volumeName, + String bucketName, + String snapshotName, + String omNodeId) throws IOException { Preconditions.checkArgument(StringUtils.isNotBlank(volumeName), "volume can't be null or empty."); Preconditions.checkArgument(StringUtils.isNotBlank(bucketName), "bucket can't be null or empty."); Preconditions.checkArgument(StringUtils.isNotBlank(snapshotName), "snapshot name can't be null or empty."); + if (StringUtils.isNotBlank(omNodeId) && Review Comment: I think we can let this be since the IOException message is different for different snapshot API calls, abstracting to a function also need to pass a string like "snapshot read", "snapshot diff", etc. I can abstract out the "if condition" if needed. ########## hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/SingleOMFailoverProxyProviderBase.java: ########## @@ -0,0 +1,252 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om.ha; + +import static org.apache.hadoop.ozone.om.ha.OMFailoverProxyProviderBase.getLeaderNotReadyException; +import static org.apache.hadoop.ozone.om.ha.OMFailoverProxyProviderBase.getNotLeaderException; + +import com.google.common.base.Preconditions; +import com.google.protobuf.ServiceException; +import java.io.IOException; +import java.net.InetSocketAddress; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.HddsUtils; +import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.utils.LegacyHadoopConfigurationSource; +import org.apache.hadoop.io.retry.FailoverProxyProvider; +import org.apache.hadoop.io.retry.RetryPolicies; +import org.apache.hadoop.io.retry.RetryPolicy; +import org.apache.hadoop.io.retry.RetryPolicy.RetryAction.RetryDecision; +import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.RPC; +import org.apache.hadoop.ipc.RemoteException; +import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.ozone.OmUtils; +import org.apache.hadoop.ozone.OzoneConfigKeys; +import org.apache.hadoop.ozone.om.exceptions.OMException; +import org.apache.hadoop.ozone.om.exceptions.OMLeaderNotReadyException; +import org.apache.hadoop.ozone.om.exceptions.OMNodeIdMismatchException; +import org.apache.hadoop.ozone.om.exceptions.OMNotLeaderException; +import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.SecretManager; +import org.apache.ratis.protocol.exceptions.StateMachineException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * An implementation of {@link FailoverProxyProvider} which does nothing in the + * event of OM failover, and always returns the same proxy object. In case of OM failover, + * client will keep retrying to connect to the same OM node. Review Comment: Yes, the client will eventually fail. This proxy provider is specifically used to create a stick to a single OM node to reduce the load on OM leader. Additionally, we want to make the snapdiff status predictable (e.g. after the first snapdiff status is DONE, the subsequent snapdiff API call on the same OM node should also return DONE). This also reduce the overall number of snapdiff jobs in different OM nodes. In the context of Cross-Region Bucket Replication (Incremental Replication), there will be a master process that will detect the OM node failure and will reassign the worker to a run Snapdiff from a new OM node. ########## hadoop-ozone/cli-shell/src/main/java/org/apache/hadoop/ozone/shell/snapshot/SnapshotDiffHandler.java: ########## @@ -91,6 +92,11 @@ public class SnapshotDiffHandler extends Handler { description = "Format output as JSON") private boolean json; + @CommandLine.Option( + names = {"-n", "--om-node-id"}, Review Comment: Yes, otherwise it would be sent to the leader. ########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java: ########## @@ -187,14 +188,50 @@ private OMResponse submitReadRequestToOM(OMRequest request) throws ServiceException { // Check if this OM is the leader. RaftServerStatus raftServerStatus = omRatisServer.checkLeaderStatus(); - if (raftServerStatus == LEADER_AND_READY || - request.getCmdType().equals(PrepareStatus)) { + boolean isNodeIdSpecified = validateNodeId(request); Review Comment: ```java if (!omRequest.getOmNodeId().equals(ozoneManager.getOMNodeId())) { throw new ServiceException(new OMNodeIdMismatchException("OM request node ID is " + omRequest.getOmNodeId() + ", but the current OM node ID is " + ozoneManager.getOMNodeId())); } ``` `validateNodeId` will throw `OmNodeIdMistmatchException` if the OM request specify the OM Node ID but the current OM Node ID is wrong. This is run before the leadership check, so it won't pass to leader. Please let me know if I miss something as well. ########## hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto: ########## @@ -179,6 +179,13 @@ message OMRequest { optional LayoutVersion layoutVersion = 6; + // If omNodeID is specified, this means that this client request is specifically + // made to the particular OM node, regardless whether the OM node is a leader. + // OM needs to rejects the request if the omNodeID is not equal to its own node ID. Review Comment: 1. Currently OM node will reject the request since this means that the OM nodes configuration of the client and the server are not synchronized. Not sure if we can add logic like the current suggested address mechanism. 2. Leadership change should not affect the request, the client will still contact the same node, even though it just became a leader. In the context of Cross Region Bucket Replication, the intent is for the client to stick to an OM listener which will never be a leader, but in the future if a worker can listen to a follower, we can add a reassignment logic to pick another OM follower. However, as mentioned in another comment, we want to support predictability of snapdiff and reduction of the number of snapdiff jobs. ########## hadoop-ozone/ozonefs-hadoop2/src/main/java/org/apache/hadoop/fs/ozone/Hadoop27RpcTransport.java: ########## @@ -101,18 +145,35 @@ public Text getDelegationTokenService() { * network exception or if the current proxy is not the leader OM. */ private OzoneManagerProtocolPB createRetryProxy( - HadoopRpcOMFailoverProxyProvider failoverProxyProvider, + HadoopRpcOMFailoverProxyProvider<OzoneManagerProtocolPB> failoverProxyProvider, int maxFailovers) { - OzoneManagerProtocolPB proxy = (OzoneManagerProtocolPB) RetryProxy.create( + return (OzoneManagerProtocolPB) RetryProxy.create( OzoneManagerProtocolPB.class, failoverProxyProvider, failoverProxyProvider.getRetryPolicy(maxFailovers)); - return proxy; + } + + /** + * Creates a {@link RetryProxy} encapsulating the + * {@link HadoopRpcSingleOMFailoverProxyProvider}. The retry proxy fails over on + * network exception. + */ + private OzoneManagerProtocolPB createRetryProxy( + HadoopRpcSingleOMFailoverProxyProvider<OzoneManagerProtocolPB> singleOMFailoverProxyProvider, + int maxRetry) { + + return (OzoneManagerProtocolPB) RetryProxy.create( + OzoneManagerProtocolPB.class, singleOMFailoverProxyProvider, + singleOMFailoverProxyProvider.getRetryPolicy(maxRetry)); } @Override public void close() throws IOException { omFailoverProxyProvider.close(); + for (HadoopRpcSingleOMFailoverProxyProvider<OzoneManagerProtocolPB> failoverProxyProvider + : omFailoverProxyProviders.values()) { + failoverProxyProvider.close(); + } Review Comment: Thank you for the review. Updated. ########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java: ########## @@ -187,14 +188,50 @@ private OMResponse submitReadRequestToOM(OMRequest request) throws ServiceException { // Check if this OM is the leader. RaftServerStatus raftServerStatus = omRatisServer.checkLeaderStatus(); - if (raftServerStatus == LEADER_AND_READY || - request.getCmdType().equals(PrepareStatus)) { + boolean isNodeIdSpecified = validateNodeId(request); + if (raftServerStatus == LEADER_AND_READY || request.getCmdType().equals(PrepareStatus)) { + return handler.handleReadRequest(request); + } else if (raftServerStatus == NOT_LEADER && isNodeIdSpecified && allowSnapshotReadFromFollower(request)) { return handler.handleReadRequest(request); } else { throw createLeaderErrorException(raftServerStatus); } } + + /** + * Check whether the OM request specified an OM node ID. + * @param omRequest OM request. + * @return true if the request specified an OM node ID and it is the same as the current OM, false otherwise. + * @throws ServiceException exception if there is a mismatch between the OM request's node ID and the + * current OM Node ID. + */ + private boolean validateNodeId(OMRequest omRequest) throws ServiceException { + if (!omRequest.hasOmNodeId()) { + return false; + } + + if (!omRequest.getOmNodeId().equals(ozoneManager.getOMNodeId())) { + throw new ServiceException(new OMNodeIdMismatchException("OM request node ID is " + omRequest.getOmNodeId() + + ", but the current OM node ID is " + ozoneManager.getOMNodeId())); + } + + return true; + } + + private boolean allowSnapshotReadFromFollower(OMRequest omRequest) { Review Comment: Currently the purpose is to support reading snapshots and keys under snapshots. We can change this once we support non-snapshot read from follower (e.g. https://github.com/apache/ozone/pull/5288). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@ozone.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@ozone.apache.org For additional commands, e-mail: issues-h...@ozone.apache.org