fapifta commented on code in PR #4943:
URL: https://github.com/apache/ozone/pull/4943#discussion_r1247588593
##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/security/RootCARotationManager.java:
##########
@@ -255,11 +489,210 @@ public Duration
timeBefore2ExpiryGracePeriod(X509Certificate certificate) {
}
}
+ /**
+ * Task to generate sub-ca key and certificate.
+ */
+ public class SubCARotationPrepareTask implements Runnable {
+ private String rootCACertId;
+
+ public SubCARotationPrepareTask(String newRootCertId) {
+ this.rootCACertId = newRootCertId;
+ }
+
+ @Override
+ public void run() {
+ // Lock to protect the sub CA certificate rotation preparation process,
+ // to make sure there is only one task is ongoing at one time.
+ // Sub CA rotation preparation steps:
+ // 1. generate new sub CA keys
+ // 2. send CSR to leader SCM
+ // 3. wait CSR response and persist the certificate to disk
+ try {
+ acquireLock();
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ return;
+ }
+ try {
+ LOG.info("SubCARotationPrepareTask[rootCertId = {}] - started.",
+ rootCACertId);
+
+ SecurityConfig securityConfig =
+ scmCertClient.getSecurityConfig();
+ String progressComponent = SCMCertificateClient.COMPONENT_NAME +
+ HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX +
+ HDDS_NEW_KEY_CERT_DIR_NAME_PROGRESS_SUFFIX;
+ final String newSubCAProgressPath =
+ securityConfig.getLocation(progressComponent).toString();
+ final String newSubCAPath = securityConfig.getLocation(
+ SCMCertificateClient.COMPONENT_NAME +
+ HDDS_NEW_KEY_CERT_DIR_NAME_SUFFIX).toString();
+
+ File newProgressDir = new File(newSubCAProgressPath);
+ File newDir = new File(newSubCAPath);
+ try {
+ FileUtils.deleteDirectory(newProgressDir);
+ FileUtils.deleteDirectory(newDir);
+ Files.createDirectories(newProgressDir.toPath());
+ } catch (IOException e) {
+ LOG.error("Failed to delete and create {}, or delete {}",
+ newProgressDir, newDir, e);
+ String message = "Terminate SCM, encounter IO exception(" +
+ e.getMessage() + ") when deleting and create directory";
+ scm.shutDown(message);
+ }
+
+ // Generate key
+ Path keyDir = securityConfig.getKeyLocation(progressComponent);
+ KeyCodec keyCodec = new KeyCodec(securityConfig, keyDir);
+ KeyPair newKeyPair = null;
+ try {
+ HDDSKeyGenerator keyGenerator = new HDDSKeyGenerator(securityConfig);
+ newKeyPair = keyGenerator.generateKey();
+ keyCodec.writePublicKey(newKeyPair.getPublic());
+ keyCodec.writePrivateKey(newKeyPair.getPrivate());
+ LOG.info("SubCARotationPrepareTask[rootCertId = {}] - " +
+ "scm key generated.", rootCACertId);
+ } catch (Exception e) {
+ LOG.error("Failed to generate key under {}", newProgressDir, e);
+ String message = "Terminate SCM, encounter exception(" +
+ e.getMessage() + ") when generating new key under " +
+ newProgressDir;
+ scm.shutDown(message);
+ }
+
+ // Get certificate signed
+ String newCertSerialId = "";
+ try {
+ CertificateSignRequest.Builder csrBuilder =
+ scmCertClient.getCSRBuilder();
+ csrBuilder.setKey(newKeyPair);
+ newCertSerialId = scmCertClient.signAndStoreCertificate(
+ csrBuilder.build(),
+ Paths.get(newSubCAProgressPath, HDDS_X509_DIR_NAME_DEFAULT),
+ true);
+ LOG.info("SubCARotationPrepareTask[rootCertId = {}] - " +
+ "scm certificate {} signed.", rootCACertId, newCertSerialId);
+ } catch (Exception e) {
+ LOG.error("Failed to generate certificate under {}",
+ newProgressDir, e);
+ String message = "Terminate SCM, encounter exception(" +
+ e.getMessage() + ") when generating new certificate " +
+ newProgressDir;
+ scm.shutDown(message);
+ }
+
+ // move dir from *-next-progress to *-next
+ try {
+ Files.move(newProgressDir.toPath(), newDir.toPath(),
+ StandardCopyOption.ATOMIC_MOVE,
+ StandardCopyOption.REPLACE_EXISTING);
+ } catch (IOException e) {
+ LOG.error("Failed to move {} to {}",
+ newSubCAProgressPath, newSubCAPath, e);
+ String message = "Terminate SCM, encounter exception(" +
+ e.getMessage() + ") when moving " + newSubCAProgressPath +
+ " to " + newSubCAPath;
+ scm.shutDown(message);
+ }
+
+ // Send ack to rotationPrepare request
+ try {
+ handler.rotationPrepareAck(rootCACertId, newCertSerialId,
+ scm.getScmId());
+ LOG.info("SubCARotationPrepareTask[rootCertId = {}] - " +
+ "rotation prepare ack sent out, new scm certificate {}",
+ rootCACertId, newCertSerialId);
+ } catch (Exception e) {
+ LOG.error("Failed to send ack to rotationPrepare request", e);
+ String message = "Terminate SCM, encounter exception(" +
+ e.getMessage() + ") when sending out rotationPrepare ack";
+ scm.shutDown(message);
+ }
+
+ handler.setSubCACertId(newCertSerialId);
+
+ releaseLockOnTimeoutTask = executorService.schedule(() -> {
+ // If no rotation commit request received after rotation prepare
+ LOG.warn("Failed to have enough rotation acks from SCM. This " +
+ " time root rotation {} is failed. Release the lock.",
+ rootCACertId);
+ releaseLock();
+ }, ackTimeout.toMillis(), TimeUnit.MILLISECONDS);
+
+ } catch (Throwable e) {
+ LOG.error("Unexpected error happen", e);
+ scm.shutDown("Unexpected error happen, " + e.getMessage());
+ }
+ }
+ }
+
+ /**
+ * Task to wait the all acks of prepare request.
+ */
+ public class WaitSubCARotationPrepareAckTask implements Runnable {
+ private String rootCACertId;
+
+ public WaitSubCARotationPrepareAckTask(
+ X509CertificateHolder rootCertHolder) {
+ this.rootCACertId = rootCertHolder.getSerialNumber().toString();
+ }
+
+ @Override
+ public void run() {
+ if (!isRunning()) {
+ LOG.info("SCM is not leader anymore. Delete the in-progress " +
+ "root CA directory");
+ cleanupAndStop("SCM is not leader anymore");
+ return;
+ }
+ synchronized (RootCARotationManager.class) {
+ int numFromHADetails =
+ scm.getSCMHANodeDetails().getPeerNodeDetails().size() + 1;
+ int numFromRatisServer = scm.getScmHAManager().getRatisServer()
+ .getDivision().getRaftConf().getCurrentPeers().size();
+ LOG.info("numFromHADetails {}, numFromRatisServer {}",
+ numFromHADetails, numFromRatisServer);
+ if (handler.rotationPrepareAcks() == numFromRatisServer) {
+ // all acks are received.
+ try {
+ waitAckTimeoutTask.cancel(false);
+ handler.rotationCommit(rootCACertId);
+ handler.rotationCommitted(rootCACertId);
+
+ metrics.incrSuccessRotationNum();
+ long timeTaken = System.nanoTime() - processStartTime.get();
+ metrics.setSuccessTimeInNs(timeTaken);
+ processStartTime.set(null);
+
+ // reset state
+ handler.resetRotationPrepareAcks();
+ String msg = "Root certificate " + rootCACertId +
+ " rotation is finished successfully after " + timeTaken + "
ns";
+ cleanupAndStop(msg);
+ } catch (Throwable e) {
+ LOG.error("Execution error", e);
+ handler.resetRotationPrepareAcks();
+ cleanupAndStop("Execution error, " + e.getMessage());
+ } finally {
+ // stop this task to re-execute again in next cycle
+ throw new RuntimeException("Exit the this " +
+ "WaitSubCARotationPrepareAckTask for root certificate " +
+ rootCACertId + " since the rotation is finished execution");
Review Comment:
If I remember well, we want to stop scheduling this task again...
I think we can take the reference of the Future once the task is submitted,
and cancel via that one instead.
Nice catch @nandakumar131 we have talked about this with Sammi, I just
missed this one from my notes as it seems. Similarly as we cancel the
waitAckTimeoutTask.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]