This is an automated email from the ASF dual-hosted git repository.
bharat pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 6752a71 HDDS-5317. BootStrapped SCM fails to bootstrap if it connects
to another bootstrapped SCM first. (#2312)
6752a71 is described below
commit 6752a71eb0bf3083f2b2026a507e0320b8a7f39c
Author: Bharat Viswanadham <[email protected]>
AuthorDate: Thu Jun 10 09:43:01 2021 +0530
HDDS-5317. BootStrapped SCM fails to bootstrap if it connects to another
bootstrapped SCM first. (#2312)
---
.../scm/ha/RetriableWithFailOverException.java | 31 ++++++++++++++++++++++
.../org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java | 2 ++
.../apache/hadoop/hdds/scm/ha/HASecurityUtils.java | 2 +-
.../org/apache/hadoop/hdds/scm/ha/RatisUtil.java | 22 +++++++++++++++
4 files changed, 56 insertions(+), 1 deletion(-)
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/RetriableWithFailOverException.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/RetriableWithFailOverException.java
new file mode 100644
index 0000000..4eaea72
--- /dev/null
+++
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/RetriableWithFailOverException.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdds.scm.ha;
+
+import java.io.IOException;
+
+/**
+ * This exception indicates that the request can be retried, and client need
+ * to retry on the next server.
+ */
+public class RetriableWithFailOverException extends IOException {
+ public RetriableWithFailOverException(IOException exception) {
+ super(exception);
+ }
+}
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java
index 0d182c0..7220d53 100644
---
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java
+++
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java
@@ -320,6 +320,8 @@ public final class SCMHAUtils {
} else if (SCMHAUtils.checkNonRetriableException(e)) {
return RetryPolicy.RetryAction.FAIL;
} else {
+ // For any other exception like RetriableWithFailOverException or any
+ // other we perform fail-over and retry.
if (failovers < maxRetryCount) {
return new RetryPolicy.RetryAction(
RetryPolicy.RetryAction.RetryDecision.FAILOVER_AND_RETRY,
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java
index 1d2b36e..a314057 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java
@@ -357,7 +357,7 @@ public final class HASecurityUtils {
.setLeaderId(null)
.setProperties(properties)
.setRetryPolicy(
- RetryPolicies.retryUpToMaximumCountWithFixedSleep(15,
+ RetryPolicies.retryUpToMaximumCountWithFixedSleep(120,
TimeDuration.valueOf(500, TimeUnit.MILLISECONDS)));
if (tlsConfig != null) {
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java
index 16394e6..63545dd 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java
@@ -22,6 +22,7 @@ import com.google.protobuf.ServiceException;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.ratis.ServerNotLeaderException;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
+import org.apache.hadoop.hdds.security.exception.SCMSecurityException;
import org.apache.hadoop.hdds.server.ServerUtils;
import org.apache.ratis.RaftConfigKeys;
import org.apache.ratis.conf.RaftProperties;
@@ -37,6 +38,10 @@ import java.io.IOException;
import java.util.Collections;
import java.util.concurrent.TimeUnit;
+import static
org.apache.hadoop.hdds.security.exception.SCMSecurityException.ErrorCode.GET_DN_CERTIFICATE_FAILED;
+import static
org.apache.hadoop.hdds.security.exception.SCMSecurityException.ErrorCode.GET_OM_CERTIFICATE_FAILED;
+import static
org.apache.hadoop.hdds.security.exception.SCMSecurityException.ErrorCode.GET_SCM_CERTIFICATE_FAILED;
+import static
org.apache.hadoop.hdds.security.exception.SCMSecurityException.ErrorCode.NOT_A_PRIMARY_SCM;
import static org.apache.ratis.server.RaftServerConfigKeys.Log;
import static org.apache.ratis.server.RaftServerConfigKeys.RetryCache;
import static org.apache.ratis.server.RaftServerConfigKeys.Rpc;
@@ -190,6 +195,23 @@ public final class RatisUtil {
throw new ServiceException(ServerNotLeaderException
.convertToNotLeaderException(nle,
SCMRatisServerImpl.getSelfPeerId(scmId), port));
+ } else if (e instanceof SCMSecurityException) {
+ // For NOT_A_PRIMARY_SCM error client needs to retry on next SCM.
+ // GetSCMCertificate call can happen on non-leader SCM and only an
+ // primary SCM. When the bootstrapped SCM connects to other
+ // bootstrapped SCM we get the NOT_A_PRIMARY_SCM. In this scenario
+ // client needs to retry next SCM.
+
+ // And also on primary/leader SCM if it failed due to any other reason
+ // retry again.
+ SCMSecurityException ex = (SCMSecurityException) e;
+ if (ex.getErrorCode().equals(NOT_A_PRIMARY_SCM)) {
+ throw new ServiceException(new RetriableWithFailOverException(e));
+ } else if (ex.getErrorCode().equals(GET_SCM_CERTIFICATE_FAILED) ||
+ ex.getErrorCode().equals(GET_OM_CERTIFICATE_FAILED) ||
+ ex.getErrorCode().equals(GET_DN_CERTIFICATE_FAILED)) {
+ throw new ServiceException(new RetriableWithNoFailoverException(e));
+ }
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]