Hi team, The attached patches implement key retrieval retry with backoff (ticket: https://fedorahosted.org/pki/ticket/2293).
Thanks, Fraser
From 93db05ea3cb892be0d19c65fdf7dc9be6759a651 Mon Sep 17 00:00:00 2001 From: Fraser Tweedale <[email protected]> Date: Tue, 31 May 2016 21:38:37 +1000 Subject: [PATCH 117/119] Limit key retrieval to a single thread per CA Before implementing lightweight CA key retrieval retry with exponential backoff, ensure that only one key retriever thread can execute at a time, for each CA. Also make SigningUnit initialisation (initSigUnit) synchronised. Part of: https://fedorahosted.org/pki/ticket/2293 --- .../src/com/netscape/ca/CertificateAuthority.java | 28 +++++++++++++++++----- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/base/ca/src/com/netscape/ca/CertificateAuthority.java b/base/ca/src/com/netscape/ca/CertificateAuthority.java index 5b2f382c29a716f3e72695b7da5406bb85b34845..c2a7d0c907b4dd5774b22cfbb404194da162a535 100644 --- a/base/ca/src/com/netscape/ca/CertificateAuthority.java +++ b/base/ca/src/com/netscape/ca/CertificateAuthority.java @@ -204,6 +204,8 @@ public class CertificateAuthority private static final Map<AuthorityID, ICertificateAuthority> caMap = Collections.synchronizedSortedMap(new TreeMap<AuthorityID, ICertificateAuthority>()); + private static final Map<AuthorityID, Thread> keyRetrieverThreads = + Collections.synchronizedSortedMap(new TreeMap<AuthorityID, Thread>()); protected CertificateAuthority hostCA = null; protected AuthorityID authorityID = null; protected AuthorityID authorityParentID = null; @@ -1460,7 +1462,7 @@ public class CertificateAuthority /** * init CA signing unit & cert chain. */ - private boolean initSigUnit(boolean retrieveKeys) + private synchronized boolean initSigUnit(boolean retrieveKeys) throws EBaseException { try { // init signing unit @@ -1491,11 +1493,16 @@ public class CertificateAuthority CMS.debug("CA signing key and cert not (yet) present in NSSDB"); signingUnitException = e; if (retrieveKeys == true) { - CMS.debug("Starting KeyRetrieverRunner thread"); - new Thread( - new KeyRetrieverRunner(this), - "KeyRetrieverRunner-" + authorityID - ).start(); + if (!keyRetrieverThreads.containsKey(authorityID)) { + CMS.debug("Starting KeyRetrieverRunner thread"); + Thread t = new Thread( + new KeyRetrieverRunner(this), + "KeyRetrieverRunner-" + authorityID); + t.start(); + keyRetrieverThreads.put(authorityID, t); + } else { + CMS.debug("KeyRetriever thread already running for authority " + authorityID); + } } return false; } @@ -3180,6 +3187,15 @@ public class CertificateAuthority } public void run() { + try { + _run(); + } finally { + // remove self from tracker + keyRetrieverThreads.remove(ca.authorityID); + } + } + + private void _run() { String KR_CLASS_KEY = "features.authority.keyRetrieverClass"; String className = null; try { -- 2.5.5
From f6eed44501b6b65f1da1e32c9c6755db180b8776 Mon Sep 17 00:00:00 2001 From: Fraser Tweedale <[email protected]> Date: Tue, 31 May 2016 22:20:06 +1000 Subject: [PATCH 118/119] Don't update obsolete CertificateAuthority after key retrieval If additional LDAP events are processed for a lightweight CA while key retrieval proceeds in another thread, when retrieval is complete, the KeyRetrieverRunner reinitialises the signing unit of a stale object. Instead of holding onto a CertificateAuthority, hold onto the AuthorityID and look it up afresh when ready to reinitialise its SigningUnit. Part of: https://fedorahosted.org/pki/ticket/2293 --- .../src/com/netscape/ca/CertificateAuthority.java | 31 +++++++++++++++++----- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/base/ca/src/com/netscape/ca/CertificateAuthority.java b/base/ca/src/com/netscape/ca/CertificateAuthority.java index c2a7d0c907b4dd5774b22cfbb404194da162a535..289ab7ac703fcd7e35b11b589f0dfb2b57488006 100644 --- a/base/ca/src/com/netscape/ca/CertificateAuthority.java +++ b/base/ca/src/com/netscape/ca/CertificateAuthority.java @@ -1496,7 +1496,7 @@ public class CertificateAuthority if (!keyRetrieverThreads.containsKey(authorityID)) { CMS.debug("Starting KeyRetrieverRunner thread"); Thread t = new Thread( - new KeyRetrieverRunner(this), + new KeyRetrieverRunner(authorityID, mNickname, authorityKeyHosts), "KeyRetrieverRunner-" + authorityID); t.start(); keyRetrieverThreads.put(authorityID, t); @@ -3180,10 +3180,15 @@ public class CertificateAuthority } private class KeyRetrieverRunner implements Runnable { - private CertificateAuthority ca; + private AuthorityID aid; + private String nickname; + private Collection<String> hosts; - public KeyRetrieverRunner(CertificateAuthority ca) { - this.ca = ca; + public KeyRetrieverRunner( + AuthorityID aid, String nickname, Collection<String> hosts) { + this.aid = aid; + this.nickname = nickname; + this.hosts = hosts; } public void run() { @@ -3191,7 +3196,7 @@ public class CertificateAuthority _run(); } finally { // remove self from tracker - keyRetrieverThreads.remove(ca.authorityID); + keyRetrieverThreads.remove(aid); } } @@ -3226,7 +3231,7 @@ public class CertificateAuthority KeyRetriever.Result krr = null; try { - krr = kr.retrieveKey(ca.mNickname, ca.authorityKeyHosts); + krr = kr.retrieveKey(nickname, hosts); } catch (Throwable e) { CMS.debug("Caught exception during execution of KeyRetriever.retrieveKey"); CMS.debug(e); @@ -3254,16 +3259,28 @@ public class CertificateAuthority CryptoUtil.importPKIArchiveOptions( token, unwrappingKey, pubkey, paoData); - cert = manager.importUserCACertPackage(certBytes, ca.mNickname); + cert = manager.importUserCACertPackage(certBytes, nickname); } catch (Throwable e) { CMS.debug("Caught exception during cert/key import"); CMS.debug(e); return; } + CertificateAuthority ca; boolean initSigUnitSucceeded = false; try { CMS.debug("Reinitialising SigningUnit"); + + /* While we were retrieving the key and cert, the + * CertificateAuthority instance in the caMap might + * have been replaced, so look it up afresh. + */ + ca = (CertificateAuthority) getCA(aid); + if (ca == null) { + CMS.debug("Authority is no longer in caMap; returning."); + return; + } + // re-init signing unit, but avoid triggering // key replication if initialisation fails again // for some reason -- 2.5.5
From de46a100698c77168467d72b16823de2630dda55 Mon Sep 17 00:00:00 2001 From: Fraser Tweedale <[email protected]> Date: Wed, 1 Jun 2016 09:46:56 +1000 Subject: [PATCH 119/119] Retry failed key retrieval with backoff If lightweight CA key retrieval fails, retry the retieval after a delay of 10 seconds initially, increasing thereafter. Fixes: https://fedorahosted.org/pki/ticket/2293 --- .../src/com/netscape/ca/CertificateAuthority.java | 58 ++++++++++++++++------ 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/base/ca/src/com/netscape/ca/CertificateAuthority.java b/base/ca/src/com/netscape/ca/CertificateAuthority.java index 289ab7ac703fcd7e35b11b589f0dfb2b57488006..1505b9951cffae2cae8dcc715bb60fa56b78cae8 100644 --- a/base/ca/src/com/netscape/ca/CertificateAuthority.java +++ b/base/ca/src/com/netscape/ca/CertificateAuthority.java @@ -3193,21 +3193,39 @@ public class CertificateAuthority public void run() { try { - _run(); + long d = 10000; // initial delay of 10 seconds + while (!_run()) { + CMS.debug("Retrying in " + d / 1000 + " seconds"); + try { + Thread.sleep(d); + } catch (InterruptedException e) { + break; + } + d += d / 2; // back off + } } finally { // remove self from tracker keyRetrieverThreads.remove(aid); } } - private void _run() { + /** + * Main routine of key retrieval and key import. + * + * @return false if retrieval should be retried, or true if + * the process is "done". Note that a result of true + * does not necessarily imply that the process fully + * completed. See comments at sites of 'return true;' + * below. + */ + private boolean _run() { String KR_CLASS_KEY = "features.authority.keyRetrieverClass"; String className = null; try { className = CMS.getConfigStore().getString(KR_CLASS_KEY); } catch (EBaseException e) { CMS.debug("Unable to read key retriever class from CS.cfg: " + e); - return; + return false; } KeyRetriever kr = null; @@ -3218,15 +3236,15 @@ public class CertificateAuthority } catch (ClassNotFoundException e) { CMS.debug("Could not find class: " + className); CMS.debug(e); - return; + return false; } catch (ClassCastException e) { CMS.debug("Class is not an instance of KeyRetriever: " + className); CMS.debug(e); - return; + return false; } catch (InstantiationException | IllegalAccessException e) { CMS.debug("Could not instantiate class: " + className); CMS.debug(e); - return; + return false; } KeyRetriever.Result krr = null; @@ -3235,12 +3253,12 @@ public class CertificateAuthority } catch (Throwable e) { CMS.debug("Caught exception during execution of KeyRetriever.retrieveKey"); CMS.debug(e); - return; + return false; } if (krr == null) { CMS.debug("KeyRetriever did not return a result."); - return; + return false; } CMS.debug("Importing key and cert"); @@ -3263,7 +3281,7 @@ public class CertificateAuthority } catch (Throwable e) { CMS.debug("Caught exception during cert/key import"); CMS.debug(e); - return; + return false; } CertificateAuthority ca; @@ -3277,8 +3295,11 @@ public class CertificateAuthority */ ca = (CertificateAuthority) getCA(aid); if (ca == null) { - CMS.debug("Authority is no longer in caMap; returning."); - return; + /* We got the key, but the authority has been + * deleted. Do not retry. + */ + CMS.debug("Authority was deleted; returning."); + return true; } // re-init signing unit, but avoid triggering @@ -3289,22 +3310,31 @@ public class CertificateAuthority } catch (Throwable e) { CMS.debug("Caught exception during SigningUnit re-init"); CMS.debug(e); - return; + return false; } if (!initSigUnitSucceeded) { CMS.debug("Failed to re-init SigningUnit"); - return; + return false; } CMS.debug("Adding self to authorityKeyHosts attribute"); try { ca.addInstanceToAuthorityKeyHosts(); } catch (Throwable e) { + /* We retrieved key, imported it, and successfully + * re-inited the signing unit. The only thing that + * failed was adding this host to the list of hosts + * that possess the key. This is unlikely, and the + * key is available elsewhere, so no need to retry. + */ CMS.debug("Failed to add self to authorityKeyHosts"); CMS.debug(e); - return; + return true; } + + /* All good! */ + return true; } } -- 2.5.5
_______________________________________________ Pki-devel mailing list [email protected] https://www.redhat.com/mailman/listinfo/pki-devel
