rhtyd closed pull request #2436: Enable restart of redundant VPCs implementing 
Rolling Restart
URL: https://github.com/apache/cloudstack/pull/2436
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/server/src/com/cloud/network/NetworkServiceImpl.java 
b/server/src/com/cloud/network/NetworkServiceImpl.java
index 93f73d20a5f..49e86892f7c 100644
--- a/server/src/com/cloud/network/NetworkServiceImpl.java
+++ b/server/src/com/cloud/network/NetworkServiceImpl.java
@@ -1861,15 +1861,6 @@ public boolean restartNetwork(RestartNetworkCmd cmd, 
boolean cleanup) throws Con
                     + Network.State.Setup);
         }
 
-        if (network.getBroadcastDomainType() == BroadcastDomainType.Lswitch) {
-            /**
-             * Unable to restart these networks now.
-             * TODO Restarting a SDN based network requires updating the nics 
and the configuration
-             * in the controller. This requires a non-trivial rewrite of the 
restart procedure.
-             */
-            throw new InvalidParameterException("Unable to restart a running 
SDN network.");
-        }
-
         _accountMgr.checkAccess(callerAccount, null, true, network);
 
         boolean success = _networkMgr.restartNetwork(networkId, callerAccount, 
callerUser, cleanup);
diff --git a/server/src/com/cloud/network/vpc/VpcManagerImpl.java 
b/server/src/com/cloud/network/vpc/VpcManagerImpl.java
index ab6441ac5b7..be9ad5a0ba8 100644
--- a/server/src/com/cloud/network/vpc/VpcManagerImpl.java
+++ b/server/src/com/cloud/network/vpc/VpcManagerImpl.java
@@ -37,6 +37,7 @@
 import javax.inject.Inject;
 import javax.naming.ConfigurationException;
 
+import com.cloud.vm.dao.DomainRouterDao;
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.log4j.Logger;
 
@@ -88,6 +89,8 @@
 import com.cloud.network.element.NetworkElement;
 import com.cloud.network.element.StaticNatServiceProvider;
 import com.cloud.network.element.VpcProvider;
+import com.cloud.network.router.VirtualRouter;
+import com.cloud.network.router.VpcVirtualNetworkApplianceManager;
 import com.cloud.network.vpc.VpcOffering.State;
 import com.cloud.network.vpc.dao.NetworkACLDao;
 import com.cloud.network.vpc.dao.PrivateIpDao;
@@ -132,8 +135,10 @@
 import com.cloud.utils.exception.CloudRuntimeException;
 import com.cloud.utils.exception.ExceptionUtil;
 import com.cloud.utils.net.NetUtils;
+import com.cloud.vm.DomainRouterVO;
 import com.cloud.vm.ReservationContext;
 import com.cloud.vm.ReservationContextImpl;
+import com.cloud.vm.VirtualMachine;
 
 public class VpcManagerImpl extends ManagerBase implements VpcManager, 
VpcProvisioningService, VpcService {
     private static final Logger s_logger = 
Logger.getLogger(VpcManagerImpl.class);
@@ -196,6 +201,10 @@
     NetworkACLManager _networkAclMgr;
     @Inject
     IpAddressManager _ipAddrMgr;
+    @Inject
+    VpcVirtualNetworkApplianceManager _routerMgr;
+    @Inject
+    DomainRouterDao _routerDao;
 
     @Inject
     private VpcPrivateGatewayTransactionCallable vpcTxCallable;
@@ -1482,7 +1491,9 @@ public boolean cleanupVpcResources(final long vpcId, 
final Account caller, final
     public boolean restartVpc(final long vpcId, final boolean cleanUp, final 
boolean makeRedundant) throws ConcurrentOperationException, 
ResourceUnavailableException,
     InsufficientCapacityException {
 
-        final Account caller = CallContext.current().getCallingAccount();
+        final Account callerAccount = 
CallContext.current().getCallingAccount();
+        final User callerUser = 
_accountMgr.getActiveUser(CallContext.current().getCallingUserId());
+        final ReservationContext context = new ReservationContextImpl(null, 
null, callerUser, callerAccount);
 
         // Verify input parameters
         final Vpc vpc = getActiveVpc(vpcId);
@@ -1492,7 +1503,7 @@ public boolean restartVpc(final long vpcId, final boolean 
cleanUp, final boolean
             throw ex;
         }
 
-        _accountMgr.checkAccess(caller, null, false, vpc);
+        _accountMgr.checkAccess(callerAccount, null, false, vpc);
 
         s_logger.debug("Restarting VPC " + vpc);
         boolean restartRequired = false;
@@ -1516,11 +1527,25 @@ public boolean restartVpc(final long vpcId, final 
boolean cleanUp, final boolean
             }
 
             if (forceCleanup) {
-                s_logger.debug("Shutting down VPC " + vpc + " as a part of VPC 
restart process");
-                if (!shutdownVpc(vpcId)) {
-                    s_logger.warn("Failed to shutdown vpc as a part of VPC " + 
vpc + " restart process");
-                    restartRequired = true;
-                    return false;
+                List<DomainRouterVO> routers = 
_routerDao.listByVpcId(vpc.getId());
+                if (routers != null && !routers.isEmpty()) {
+                    s_logger.debug("Shutting down VPC " + vpc + " as a part of 
VPC restart process");
+                    // Get rid of any non-Running routers
+                    for (final DomainRouterVO router : routers) {
+                        if (router.getState() != VirtualMachine.State.Running) 
{
+                            s_logger.debug("Destroying " + router + " as it is 
not in Running state anyway");
+                            _routerMgr.destroyRouter(router.getId(), 
context.getAccount(), context.getCaller().getId());
+                        }
+                    }
+                    // Refresh the list of routers
+                    routers = _routerDao.listByVpcId(vpc.getId());
+                    if (routers != null && !routers.isEmpty()) {
+                        if (!rollingRestartVpc(vpc, routers, context)) {
+                            s_logger.warn("Failed to execute a rolling restart 
as a part of VPC " + vpc + " restart process");
+                            restartRequired = true;
+                            return false;
+                        }
+                    }
                 }
             } else {
                 s_logger.info("Will not shutdown vpc as a part of VPC " + vpc 
+ " restart process.");
@@ -2435,4 +2460,83 @@ public boolean isSrcNatIpRequired(long vpcOfferingId) {
         final Map<Network.Service, Set<Network.Provider>> 
vpcOffSvcProvidersMap = getVpcOffSvcProvidersMap(vpcOfferingId);
         return 
vpcOffSvcProvidersMap.get(Network.Service.SourceNat).contains(Network.Provider.VPCVirtualRouter);
     }
+
+    private boolean rollingRestartVpc(Vpc vpc, List<DomainRouterVO> routers, 
ReservationContext context) throws ResourceUnavailableException, 
ConcurrentOperationException, InsufficientCapacityException {
+        final int sleepTimeInMsAfterRouterStart = 10000;
+        final int numberOfRoutersWhenSingle = 1;
+        final int numberOfRoutersWhenRedundant = 2;
+
+        // check the master and backup redundant state
+        DomainRouterVO mainRouter = null;
+        DomainRouterVO secondaryRouter = null;
+        if (routers != null && routers.size() == numberOfRoutersWhenSingle) {
+            mainRouter = routers.get(0);
+            s_logger.debug("Rolling restart found a single router " + 
mainRouter.getInstanceName() + " as part of rolling restart of VPC " + vpc);
+        } if (routers != null && routers.size() == 
numberOfRoutersWhenRedundant) {
+            DomainRouterVO router1 = routers.get(0);
+            DomainRouterVO router2 = routers.get(1);
+            if (router1.getRedundantState() == 
VirtualRouter.RedundantState.MASTER || router2.getRedundantState() == 
VirtualRouter.RedundantState.BACKUP) {
+                mainRouter = router1;
+                secondaryRouter = router2;
+            } else if (router1.getRedundantState() == 
VirtualRouter.RedundantState.BACKUP || router2.getRedundantState() == 
VirtualRouter.RedundantState.MASTER) {
+                mainRouter = router2;
+                secondaryRouter = router1;
+            } else {
+                // both routers are in UNKNOWN state or in the same state. 
Order doesn't matter.
+                mainRouter = router1;
+                secondaryRouter = router2;
+            }
+            s_logger.debug("Rolling restart of VPC " + vpc + " will first 
replace router " + secondaryRouter.getInstanceName() + " and then router " + 
mainRouter.getInstanceName());
+        }
+
+        DeployDestination dest = new 
DeployDestination(_dcDao.findById(vpc.getZoneId()), null, null, null);
+
+        // If we are supposed to be redundant, let's replace the backup router
+        // We do this even when backupRouter is null, so we first spin a new 
router before replacing the other router
+        if (vpc.isRedundant()) {
+            if (!replaceRouter(vpc, context, sleepTimeInMsAfterRouterStart, 
secondaryRouter, dest)) {
+                s_logger.debug("Recreating the secondary router for VPC " + 
vpc + " failed.");
+                return false;
+            }
+        }
+
+        // If we have a single router, replace it here
+        if (mainRouter != null) {
+            if (!replaceRouter(vpc, context, sleepTimeInMsAfterRouterStart, 
mainRouter, dest)) {
+                s_logger.debug("Recreating the main router for VPC " + vpc + " 
failed.");
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    private boolean replaceRouter(final Vpc vpc, final ReservationContext 
context, final int sleepTimeInMsAfterRouterStart, final DomainRouterVO 
routerToReplace, final DeployDestination dest) throws 
ResourceUnavailableException, InsufficientCapacityException {
+        if (routerToReplace != null) {
+            s_logger.debug("Destroying router " + 
routerToReplace.getInstanceName() + " as part of rolling restart of VPC " + 
vpc);
+            _routerMgr.destroyRouter(routerToReplace.getId(), 
context.getAccount(), context.getCaller().getId());
+        }
+        s_logger.debug("Triggering new router create as part of rolling 
restart of VPC " + vpc);
+        startVpc(vpc, dest, context);
+        try {
+            // wait for the keepalived/conntrackd on router
+            Thread.sleep(sleepTimeInMsAfterRouterStart);
+        } catch (InterruptedException e) {
+            s_logger.trace("Ignoring InterruptedException.", e);
+        }
+
+        // Routers after this action
+        List<DomainRouterVO> routers = _routerDao.listByVpcId(vpc.getId());
+        for (final DomainRouterVO router : routers) {
+            // Both should be in state Running, or else the provisioning went 
wrong somehow as we started with destroying non-Running routers
+            // In order not to kill both routers, we'll stop the procedure.
+            if (router.getState() != VirtualMachine.State.Running) {
+                s_logger.debug("Found router " + router.getInstanceName() + " 
part of VPC " + vpc + " to be in non-Running state " + router.getState() + ", 
so not proceeding with" +
+                        "next router to prevent downtime. Please try again.");
+                return false;
+            }
+        }
+        return true;
+    }
+
 }


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to