fapifta commented on code in PR #6716: URL: https://github.com/apache/ozone/pull/6716#discussion_r1621521528
########## hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/ratis/BaseRatisCommand.java: ########## @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.admin.ratis; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.ratis.RatisHelper; +import org.apache.hadoop.hdds.scm.client.ClientTrustManager; +import org.apache.hadoop.hdds.security.SecurityConfig; +import org.apache.hadoop.hdds.security.x509.certificate.client.CACertificateProvider; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.client.OzoneClientFactory; +import org.apache.hadoop.ozone.om.helpers.ServiceInfoEx; +import org.apache.ratis.client.RaftClient; +import org.apache.ratis.grpc.GrpcTlsConfig; +import org.apache.ratis.proto.RaftProtos.FollowerInfoProto; +import org.apache.ratis.proto.RaftProtos.RaftPeerProto; +import org.apache.ratis.proto.RaftProtos.RaftPeerRole; +import org.apache.ratis.proto.RaftProtos.RoleInfoProto; +import org.apache.ratis.protocol.GroupInfoReply; +import org.apache.ratis.protocol.RaftGroup; +import org.apache.ratis.protocol.RaftGroupId; +import org.apache.ratis.protocol.RaftPeer; +import org.apache.ratis.rpc.SupportedRpcType; + +import java.util.List; + +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_GRPC_TLS_ENABLED; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_GRPC_TLS_ENABLED_DEFAULT; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SECURITY_ENABLED_DEFAULT; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SECURITY_ENABLED_KEY; +import static org.apache.ratis.shell.cli.RaftUtils.buildRaftGroupIdFromStr; +import static org.apache.ratis.shell.cli.RaftUtils.buildRaftPeersFromStr; +import static org.apache.ratis.shell.cli.RaftUtils.retrieveGroupInfoByGroupId; +import static org.apache.ratis.shell.cli.RaftUtils.retrieveRemoteGroupId; + +/** + * The base class for all the ratis command. + */ +public class BaseRatisCommand { + private RaftGroup raftGroup; + private GroupInfoReply groupInfoReply; + private OzoneConfiguration config; + private String omServiceId; + + public void run(String peers, String groupid, String omServiceId) throws Exception { + this.config = new OzoneConfiguration(); + this.omServiceId = omServiceId; + List<RaftPeer> peerList = buildRaftPeersFromStr(peers); + RaftGroupId raftGroupIdFromConfig = buildRaftGroupIdFromStr(groupid); + raftGroup = RaftGroup.valueOf(raftGroupIdFromConfig, peerList); + + try (RaftClient client = buildRaftClient(raftGroup)) { + RaftGroupId remoteGroupId = retrieveRemoteGroupId(raftGroupIdFromConfig, peerList, client, System.out); + groupInfoReply = retrieveGroupInfoByGroupId(remoteGroupId, peerList, client, System.out); + raftGroup = groupInfoReply.getGroup(); + } + + } + + protected RaftClient buildRaftClient(RaftGroup raftGroup) throws Exception { + GrpcTlsConfig tlsConfig = null; + if (config.getBoolean(OZONE_SECURITY_ENABLED_KEY, OZONE_SECURITY_ENABLED_DEFAULT) && + config.getBoolean(HDDS_GRPC_TLS_ENABLED, HDDS_GRPC_TLS_ENABLED_DEFAULT)) { + tlsConfig = createGrpcTlsConf(omServiceId); + } + + return RatisHelper.newRaftClient( + SupportedRpcType.GRPC, null, raftGroup, + RatisHelper.createRetryPolicy(config), tlsConfig, config); + } + + public RaftGroup getRaftGroup() { + return raftGroup; + } + + public GroupInfoReply getGroupInfoReply() { + return groupInfoReply; + } + + /** + * Get the leader id. + * + * @param roleInfo the role info + * @return the leader id + */ + protected RaftPeerProto getLeader(RoleInfoProto roleInfo) { + if (roleInfo == null) { + return null; + } + if (roleInfo.getRole() == RaftPeerRole.LEADER) { + return roleInfo.getSelf(); + } + FollowerInfoProto followerInfo = roleInfo.getFollowerInfo(); + if (followerInfo == null) { + return null; + } + return followerInfo.getLeaderInfo().getId(); + } + + private GrpcTlsConfig createGrpcTlsConf(String omServiceId) throws Exception { + OzoneClient certClient = OzoneClientFactory.getRpcClient(omServiceId, + config); + + ServiceInfoEx serviceInfoEx = certClient + .getObjectStore() + .getClientProxy() + .getOzoneManagerClient() + .getServiceInfo(); + + CACertificateProvider remoteCAProvider = + serviceInfoEx::provideCACerts; + ClientTrustManager trustManager = new ClientTrustManager(remoteCAProvider, serviceInfoEx); + + return RatisHelper.createTlsClientConfig(new SecurityConfig(config), trustManager); Review Comment: At this point we also have a potential problem... As I understand this PR, the goal is to provide CLI commands that an admin can run and via which the admin can issue commands to a Ratis Group directly from the CLI. If this understanding of mine is correct, then we may run into a problem with mutual TLS authentication that happens between the raft peers, because the createTlsClientConfig call creates a client side config that does not apply mTLS because the client does not have its own certificate that our internal PKI system trusts, and also it does not have a way to acquire such a certificate from the internal PKI system. So unless the client here talks to a ratis client port that does not require mTLS but uses other authentication mechanisms like Kerberos, then this approach ain't gonna fly and honestly in this case we need to figure out if we can and how we can allow the client to get its own certificate to use mTLS with the raft server side. (Also we might need to offload this from SCM if these calls are something clients would require potentially often, as we can not afford to generate a lot of certificate on the SCM side as it is a costly operation.) Not to mention the complexity of ensuring that only valid users can grab a cert from the system. OTOH, if these raft servers do not use mTLS then how they authenticate and identify each others? ########## hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/ratis/BaseRatisCommand.java: ########## @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.admin.ratis; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.ratis.RatisHelper; +import org.apache.hadoop.hdds.scm.client.ClientTrustManager; +import org.apache.hadoop.hdds.security.SecurityConfig; +import org.apache.hadoop.hdds.security.x509.certificate.client.CACertificateProvider; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.client.OzoneClientFactory; +import org.apache.hadoop.ozone.om.helpers.ServiceInfoEx; +import org.apache.ratis.client.RaftClient; +import org.apache.ratis.grpc.GrpcTlsConfig; +import org.apache.ratis.proto.RaftProtos.FollowerInfoProto; +import org.apache.ratis.proto.RaftProtos.RaftPeerProto; +import org.apache.ratis.proto.RaftProtos.RaftPeerRole; +import org.apache.ratis.proto.RaftProtos.RoleInfoProto; +import org.apache.ratis.protocol.GroupInfoReply; +import org.apache.ratis.protocol.RaftGroup; +import org.apache.ratis.protocol.RaftGroupId; +import org.apache.ratis.protocol.RaftPeer; +import org.apache.ratis.rpc.SupportedRpcType; + +import java.util.List; + +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_GRPC_TLS_ENABLED; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_GRPC_TLS_ENABLED_DEFAULT; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SECURITY_ENABLED_DEFAULT; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SECURITY_ENABLED_KEY; +import static org.apache.ratis.shell.cli.RaftUtils.buildRaftGroupIdFromStr; +import static org.apache.ratis.shell.cli.RaftUtils.buildRaftPeersFromStr; +import static org.apache.ratis.shell.cli.RaftUtils.retrieveGroupInfoByGroupId; +import static org.apache.ratis.shell.cli.RaftUtils.retrieveRemoteGroupId; + +/** + * The base class for all the ratis command. + */ +public class BaseRatisCommand { + private RaftGroup raftGroup; + private GroupInfoReply groupInfoReply; + private OzoneConfiguration config; + private String omServiceId; + + public void run(String peers, String groupid, String omServiceId) throws Exception { + this.config = new OzoneConfiguration(); + this.omServiceId = omServiceId; + List<RaftPeer> peerList = buildRaftPeersFromStr(peers); + RaftGroupId raftGroupIdFromConfig = buildRaftGroupIdFromStr(groupid); + raftGroup = RaftGroup.valueOf(raftGroupIdFromConfig, peerList); + + try (RaftClient client = buildRaftClient(raftGroup)) { + RaftGroupId remoteGroupId = retrieveRemoteGroupId(raftGroupIdFromConfig, peerList, client, System.out); + groupInfoReply = retrieveGroupInfoByGroupId(remoteGroupId, peerList, client, System.out); + raftGroup = groupInfoReply.getGroup(); + } + + } + + protected RaftClient buildRaftClient(RaftGroup raftGroup) throws Exception { + GrpcTlsConfig tlsConfig = null; + if (config.getBoolean(OZONE_SECURITY_ENABLED_KEY, OZONE_SECURITY_ENABLED_DEFAULT) && + config.getBoolean(HDDS_GRPC_TLS_ENABLED, HDDS_GRPC_TLS_ENABLED_DEFAULT)) { + tlsConfig = createGrpcTlsConf(omServiceId); + } + + return RatisHelper.newRaftClient( + SupportedRpcType.GRPC, null, raftGroup, + RatisHelper.createRetryPolicy(config), tlsConfig, config); + } + + public RaftGroup getRaftGroup() { + return raftGroup; + } + + public GroupInfoReply getGroupInfoReply() { + return groupInfoReply; + } + + /** + * Get the leader id. + * + * @param roleInfo the role info + * @return the leader id + */ + protected RaftPeerProto getLeader(RoleInfoProto roleInfo) { + if (roleInfo == null) { + return null; + } + if (roleInfo.getRole() == RaftPeerRole.LEADER) { + return roleInfo.getSelf(); + } + FollowerInfoProto followerInfo = roleInfo.getFollowerInfo(); + if (followerInfo == null) { + return null; + } + return followerInfo.getLeaderInfo().getId(); + } + + private GrpcTlsConfig createGrpcTlsConf(String omServiceId) throws Exception { + OzoneClient certClient = OzoneClientFactory.getRpcClient(omServiceId, + config); + + ServiceInfoEx serviceInfoEx = certClient + .getObjectStore() + .getClientProxy() + .getOzoneManagerClient() + .getServiceInfo(); + + CACertificateProvider remoteCAProvider = Review Comment: I am not sure if this remoteProvider definition is intentional or is a misunderstanding of the ClientTrustManager's intents, but let me provide the intent based on which I hope you can decide, and course correct if needed. The ClientTrustManager is designed in this way to ensure we can give the long running clients a possibility to renew their knowledge of the Ozone cluster's internal rootCA certificate that the client side uses to verify the identity of DataNodes in case of a rootCA rotation that happens some time before the rootCA actually expires, and is a long process by default during which there might be multiple rootCA certificates playing the role of the root of trust (the old one and the new one), after which period the new rootCA becomes the only valid one (but that does not affect the client side which can run further with the two together at the moment). The current way of initializing the ClientTrustManager here, leads to a state where this renew does not happen, as both the remote and the in-memory provider is the same (the remote basically delegates to the in-memory instance via a lambda expression). This way of operation in the ClientTrustManager can be achieved by specifying the remote provider to null, as in both cases no additional RPC call would be done even if there is verification problem with the certificate (the new DN cert is signed by a different CA). If this is a short running client which it is (as it runs one or more RPC calls in a short period of time and exits). If we can assume that these RPC calls are idempotent, then we don't need the remote provider, as in case of a rare event when two or more RPCs fall to the different sides on a rootCA rotation in time, then the whole thing can be done once more and it will not fail with a certificate issue the second time. If this would be a long running client that issues multiple RPCs over a longer period of time, then it does worth it to provide a remote provider other than null, and in this case the best a remote provider can do to refresh the root of trust is to call OM's getServiceInfo method via an RPC, and provide the root(s) of trust based on the result of that RPC call. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
