fapifta commented on code in PR #6716:
URL: https://github.com/apache/ozone/pull/6716#discussion_r1621521528


##########
hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/ratis/BaseRatisCommand.java:
##########
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.admin.ratis;
+
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.ratis.RatisHelper;
+import org.apache.hadoop.hdds.scm.client.ClientTrustManager;
+import org.apache.hadoop.hdds.security.SecurityConfig;
+import 
org.apache.hadoop.hdds.security.x509.certificate.client.CACertificateProvider;
+import org.apache.hadoop.ozone.client.OzoneClient;
+import org.apache.hadoop.ozone.client.OzoneClientFactory;
+import org.apache.hadoop.ozone.om.helpers.ServiceInfoEx;
+import org.apache.ratis.client.RaftClient;
+import org.apache.ratis.grpc.GrpcTlsConfig;
+import org.apache.ratis.proto.RaftProtos.FollowerInfoProto;
+import org.apache.ratis.proto.RaftProtos.RaftPeerProto;
+import org.apache.ratis.proto.RaftProtos.RaftPeerRole;
+import org.apache.ratis.proto.RaftProtos.RoleInfoProto;
+import org.apache.ratis.protocol.GroupInfoReply;
+import org.apache.ratis.protocol.RaftGroup;
+import org.apache.ratis.protocol.RaftGroupId;
+import org.apache.ratis.protocol.RaftPeer;
+import org.apache.ratis.rpc.SupportedRpcType;
+
+import java.util.List;
+
+import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_GRPC_TLS_ENABLED;
+import static 
org.apache.hadoop.hdds.HddsConfigKeys.HDDS_GRPC_TLS_ENABLED_DEFAULT;
+import static 
org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SECURITY_ENABLED_DEFAULT;
+import static 
org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SECURITY_ENABLED_KEY;
+import static org.apache.ratis.shell.cli.RaftUtils.buildRaftGroupIdFromStr;
+import static org.apache.ratis.shell.cli.RaftUtils.buildRaftPeersFromStr;
+import static org.apache.ratis.shell.cli.RaftUtils.retrieveGroupInfoByGroupId;
+import static org.apache.ratis.shell.cli.RaftUtils.retrieveRemoteGroupId;
+
+/**
+ * The base class for all the ratis command.
+ */
+public class BaseRatisCommand {
+  private RaftGroup raftGroup;
+  private GroupInfoReply groupInfoReply;
+  private OzoneConfiguration config;
+  private String omServiceId;
+
+  public void run(String peers, String groupid, String omServiceId) throws 
Exception {
+    this.config = new OzoneConfiguration();
+    this.omServiceId = omServiceId;
+    List<RaftPeer> peerList = buildRaftPeersFromStr(peers);
+    RaftGroupId raftGroupIdFromConfig = buildRaftGroupIdFromStr(groupid);
+    raftGroup = RaftGroup.valueOf(raftGroupIdFromConfig, peerList);
+
+    try (RaftClient client = buildRaftClient(raftGroup)) {
+      RaftGroupId remoteGroupId = retrieveRemoteGroupId(raftGroupIdFromConfig, 
peerList, client, System.out);
+      groupInfoReply = retrieveGroupInfoByGroupId(remoteGroupId, peerList, 
client, System.out);
+      raftGroup = groupInfoReply.getGroup();
+    }
+
+  }
+
+  protected RaftClient buildRaftClient(RaftGroup raftGroup) throws Exception {
+    GrpcTlsConfig tlsConfig = null;
+    if (config.getBoolean(OZONE_SECURITY_ENABLED_KEY, 
OZONE_SECURITY_ENABLED_DEFAULT) &&
+        config.getBoolean(HDDS_GRPC_TLS_ENABLED, 
HDDS_GRPC_TLS_ENABLED_DEFAULT)) {
+      tlsConfig = createGrpcTlsConf(omServiceId);
+    }
+
+    return RatisHelper.newRaftClient(
+        SupportedRpcType.GRPC, null, raftGroup,
+        RatisHelper.createRetryPolicy(config), tlsConfig, config);
+  }
+
+  public RaftGroup getRaftGroup() {
+    return raftGroup;
+  }
+
+  public GroupInfoReply getGroupInfoReply() {
+    return groupInfoReply;
+  }
+
+  /**
+   * Get the leader id.
+   *
+   * @param roleInfo the role info
+   * @return the leader id
+   */
+  protected RaftPeerProto getLeader(RoleInfoProto roleInfo) {
+    if (roleInfo == null) {
+      return null;
+    }
+    if (roleInfo.getRole() == RaftPeerRole.LEADER) {
+      return roleInfo.getSelf();
+    }
+    FollowerInfoProto followerInfo = roleInfo.getFollowerInfo();
+    if (followerInfo == null) {
+      return null;
+    }
+    return followerInfo.getLeaderInfo().getId();
+  }
+
+  private GrpcTlsConfig createGrpcTlsConf(String omServiceId) throws Exception 
{
+    OzoneClient certClient = OzoneClientFactory.getRpcClient(omServiceId,
+        config);
+
+    ServiceInfoEx serviceInfoEx = certClient
+        .getObjectStore()
+        .getClientProxy()
+        .getOzoneManagerClient()
+        .getServiceInfo();
+
+    CACertificateProvider remoteCAProvider =
+        serviceInfoEx::provideCACerts;
+    ClientTrustManager trustManager = new ClientTrustManager(remoteCAProvider, 
serviceInfoEx);
+
+    return RatisHelper.createTlsClientConfig(new SecurityConfig(config), 
trustManager);

Review Comment:
   At this point we also have a potential problem...
   
   As I understand this PR, the goal is to provide CLI commands that an admin 
can run and via which the admin can issue commands to a Ratis Group directly 
from the CLI.
   
   If this understanding of mine is correct, then we may run into a problem 
with mutual TLS authentication that happens between the raft peers, because the 
createTlsClientConfig call creates a client side config that does not apply 
mTLS because the client does not have its own certificate that our internal PKI 
system trusts, and also it does not have a way to acquire such a certificate 
from the internal PKI system.
   
   So unless the client here talks to a ratis client port that does not require 
mTLS but uses other authentication mechanisms like Kerberos, then this approach 
ain't gonna fly and honestly in this case we need to figure out if we can and 
how we can allow the client to get its own certificate to use mTLS with the 
raft server side. (Also we might need to offload this from SCM if these calls 
are something clients would require potentially often, as we can not afford to 
generate a lot of certificate on the SCM side as it is a costly operation.) Not 
to mention the complexity of ensuring that only valid users can grab a cert 
from the system.
   OTOH, if these raft servers do not use mTLS then how they authenticate and 
identify each others?



##########
hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/ratis/BaseRatisCommand.java:
##########
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.admin.ratis;
+
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.ratis.RatisHelper;
+import org.apache.hadoop.hdds.scm.client.ClientTrustManager;
+import org.apache.hadoop.hdds.security.SecurityConfig;
+import 
org.apache.hadoop.hdds.security.x509.certificate.client.CACertificateProvider;
+import org.apache.hadoop.ozone.client.OzoneClient;
+import org.apache.hadoop.ozone.client.OzoneClientFactory;
+import org.apache.hadoop.ozone.om.helpers.ServiceInfoEx;
+import org.apache.ratis.client.RaftClient;
+import org.apache.ratis.grpc.GrpcTlsConfig;
+import org.apache.ratis.proto.RaftProtos.FollowerInfoProto;
+import org.apache.ratis.proto.RaftProtos.RaftPeerProto;
+import org.apache.ratis.proto.RaftProtos.RaftPeerRole;
+import org.apache.ratis.proto.RaftProtos.RoleInfoProto;
+import org.apache.ratis.protocol.GroupInfoReply;
+import org.apache.ratis.protocol.RaftGroup;
+import org.apache.ratis.protocol.RaftGroupId;
+import org.apache.ratis.protocol.RaftPeer;
+import org.apache.ratis.rpc.SupportedRpcType;
+
+import java.util.List;
+
+import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_GRPC_TLS_ENABLED;
+import static 
org.apache.hadoop.hdds.HddsConfigKeys.HDDS_GRPC_TLS_ENABLED_DEFAULT;
+import static 
org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SECURITY_ENABLED_DEFAULT;
+import static 
org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SECURITY_ENABLED_KEY;
+import static org.apache.ratis.shell.cli.RaftUtils.buildRaftGroupIdFromStr;
+import static org.apache.ratis.shell.cli.RaftUtils.buildRaftPeersFromStr;
+import static org.apache.ratis.shell.cli.RaftUtils.retrieveGroupInfoByGroupId;
+import static org.apache.ratis.shell.cli.RaftUtils.retrieveRemoteGroupId;
+
+/**
+ * The base class for all the ratis command.
+ */
+public class BaseRatisCommand {
+  private RaftGroup raftGroup;
+  private GroupInfoReply groupInfoReply;
+  private OzoneConfiguration config;
+  private String omServiceId;
+
+  public void run(String peers, String groupid, String omServiceId) throws 
Exception {
+    this.config = new OzoneConfiguration();
+    this.omServiceId = omServiceId;
+    List<RaftPeer> peerList = buildRaftPeersFromStr(peers);
+    RaftGroupId raftGroupIdFromConfig = buildRaftGroupIdFromStr(groupid);
+    raftGroup = RaftGroup.valueOf(raftGroupIdFromConfig, peerList);
+
+    try (RaftClient client = buildRaftClient(raftGroup)) {
+      RaftGroupId remoteGroupId = retrieveRemoteGroupId(raftGroupIdFromConfig, 
peerList, client, System.out);
+      groupInfoReply = retrieveGroupInfoByGroupId(remoteGroupId, peerList, 
client, System.out);
+      raftGroup = groupInfoReply.getGroup();
+    }
+
+  }
+
+  protected RaftClient buildRaftClient(RaftGroup raftGroup) throws Exception {
+    GrpcTlsConfig tlsConfig = null;
+    if (config.getBoolean(OZONE_SECURITY_ENABLED_KEY, 
OZONE_SECURITY_ENABLED_DEFAULT) &&
+        config.getBoolean(HDDS_GRPC_TLS_ENABLED, 
HDDS_GRPC_TLS_ENABLED_DEFAULT)) {
+      tlsConfig = createGrpcTlsConf(omServiceId);
+    }
+
+    return RatisHelper.newRaftClient(
+        SupportedRpcType.GRPC, null, raftGroup,
+        RatisHelper.createRetryPolicy(config), tlsConfig, config);
+  }
+
+  public RaftGroup getRaftGroup() {
+    return raftGroup;
+  }
+
+  public GroupInfoReply getGroupInfoReply() {
+    return groupInfoReply;
+  }
+
+  /**
+   * Get the leader id.
+   *
+   * @param roleInfo the role info
+   * @return the leader id
+   */
+  protected RaftPeerProto getLeader(RoleInfoProto roleInfo) {
+    if (roleInfo == null) {
+      return null;
+    }
+    if (roleInfo.getRole() == RaftPeerRole.LEADER) {
+      return roleInfo.getSelf();
+    }
+    FollowerInfoProto followerInfo = roleInfo.getFollowerInfo();
+    if (followerInfo == null) {
+      return null;
+    }
+    return followerInfo.getLeaderInfo().getId();
+  }
+
+  private GrpcTlsConfig createGrpcTlsConf(String omServiceId) throws Exception 
{
+    OzoneClient certClient = OzoneClientFactory.getRpcClient(omServiceId,
+        config);
+
+    ServiceInfoEx serviceInfoEx = certClient
+        .getObjectStore()
+        .getClientProxy()
+        .getOzoneManagerClient()
+        .getServiceInfo();
+
+    CACertificateProvider remoteCAProvider =

Review Comment:
   I am not sure if this remoteProvider definition is intentional or is a 
misunderstanding of the ClientTrustManager's intents, but let me provide the 
intent based on which I hope you can decide, and course correct if needed.
   
   The ClientTrustManager is designed in this way to ensure we can give the 
long running clients a possibility to renew their knowledge of the Ozone 
cluster's internal rootCA certificate that the client side uses to verify the 
identity of DataNodes in case of a rootCA rotation that happens some time 
before the rootCA actually expires, and is a long process by default during 
which there might be multiple rootCA certificates playing the role of the root 
of trust (the old one and the new one), after which period the new rootCA 
becomes the only valid one (but that does not affect the client side which can 
run further with the two together at the moment).
   
   The current way of initializing the ClientTrustManager here, leads to a 
state where this renew does not happen, as both the remote and the in-memory 
provider is the same (the remote basically delegates to the in-memory instance 
via a lambda expression). This way of operation in the ClientTrustManager can 
be achieved by specifying the remote provider to null, as in both cases no 
additional RPC call would be done even if there is verification problem with 
the certificate (the new DN cert is signed by a different CA).
   
   If this is a short running client which it is (as it runs one or more RPC 
calls in a short period of time and exits). If we can assume that these RPC 
calls are idempotent, then we don't need the remote provider, as in case of a 
rare event when two or more RPCs fall to the different sides on a rootCA 
rotation in time, then the whole thing can be done once more and it will not 
fail with a certificate issue the second time.
   
   If this would be a long running client that issues multiple RPCs over a 
longer period of time, then it does worth it to provide a remote provider other 
than null, and in this case the best a remote provider can do to refresh the 
root of trust is to call OM's getServiceInfo method via an RPC, and provide the 
root(s) of trust based on the result of that RPC call.
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to