[
https://issues.apache.org/jira/browse/CLOUDSTACK-9114?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16411332#comment-16411332
]
ASF GitHub Bot commented on CLOUDSTACK-9114:
--------------------------------------------
rhtyd closed pull request #2435: CLOUDSTACK-9114: restartnetwork with cleanup
should restart the RVRs …
URL: https://github.com/apache/cloudstack/pull/2435
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git
a/engine/orchestration/src/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java
b/engine/orchestration/src/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java
index cec2e5926c1..e324db7cf8c 100644
---
a/engine/orchestration/src/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java
+++
b/engine/orchestration/src/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java
@@ -38,6 +38,7 @@
import javax.inject.Inject;
import javax.naming.ConfigurationException;
+import com.cloud.network.VpcVirtualNetworkApplianceService;
import org.apache.log4j.Logger;
import org.apache.cloudstack.acl.ControlledEntity.ACLType;
@@ -45,7 +46,6 @@
import org.apache.cloudstack.engine.cloud.entity.api.db.VMNetworkMapVO;
import org.apache.cloudstack.engine.cloud.entity.api.db.dao.VMNetworkMapDao;
import
org.apache.cloudstack.engine.orchestration.service.NetworkOrchestrationService;
-import org.apache.cloudstack.framework.config.ConfigDepot;
import org.apache.cloudstack.framework.config.ConfigKey;
import org.apache.cloudstack.framework.config.ConfigKey.Scope;
import org.apache.cloudstack.framework.config.Configurable;
@@ -53,7 +53,6 @@
import org.apache.cloudstack.framework.messagebus.MessageBus;
import org.apache.cloudstack.framework.messagebus.PublishScope;
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
-import org.apache.cloudstack.region.PortableIpDao;
import com.cloud.agent.AgentManager;
@@ -86,7 +85,6 @@
import com.cloud.deploy.DeployDestination;
import com.cloud.deploy.DeploymentPlan;
import com.cloud.domain.Domain;
-import com.cloud.event.dao.UsageEventDao;
import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.ConnectionException;
import com.cloud.exception.InsufficientAddressCapacityException;
@@ -127,7 +125,6 @@
import com.cloud.network.dao.NetworkAccountDao;
import com.cloud.network.dao.NetworkAccountVO;
import com.cloud.network.dao.NetworkDao;
-import com.cloud.network.dao.NetworkDetailsDao;
import com.cloud.network.dao.NetworkDomainDao;
import com.cloud.network.dao.NetworkDomainVO;
import com.cloud.network.dao.NetworkServiceMapDao;
@@ -140,7 +137,6 @@
import com.cloud.network.dao.PhysicalNetworkVO;
import com.cloud.network.dao.RemoteAccessVpnDao;
import com.cloud.network.dao.RemoteAccessVpnVO;
-import com.cloud.network.dao.VpnUserDao;
import com.cloud.network.element.AggregatedCommandExecutor;
import com.cloud.network.element.DhcpServiceProvider;
import com.cloud.network.element.DnsServiceProvider;
@@ -286,11 +282,11 @@
@Inject
VMNetworkMapDao _vmNetworkMapDao;
@Inject
- DomainRouterDao _rotuerDao;
+ DomainRouterDao _routerDao;
@Inject
RemoteAccessVpnDao _remoteAccessVpnDao;
@Inject
- VpnUserDao _vpnUserDao;
+ VpcVirtualNetworkApplianceService _routerService;
List<NetworkGuru> networkGurus;
@@ -368,17 +364,9 @@ public void setDhcpProviders(final
List<DhcpServiceProvider> dhcpProviders) {
@Inject
NetworkACLManager _networkACLMgr;
@Inject
- UsageEventDao _usageEventDao;
- @Inject
NetworkModel _networkModel;
@Inject
NicSecondaryIpDao _nicSecondaryIpDao;
- @Inject
- PortableIpDao _portableIpDao;
- @Inject
- ConfigDepot _configDepot;
- @Inject
- NetworkDetailsDao _networkDetailsDao;
protected StateMachine2<Network.State, Network.Event, Network>
_stateMachine;
ScheduledExecutorService _executor;
@@ -1145,30 +1133,8 @@ public void implementNetworkElementsAndResources(final
DeployDestination dest, f
}
}
// get providers to implement
- final List<Provider> providersToImplement =
getNetworkProviders(network.getId());
- for (final NetworkElement element : networkElements) {
- if (providersToImplement.contains(element.getProvider())) {
- if
(!_networkModel.isProviderEnabledInPhysicalNetwork(_networkModel.getPhysicalNetworkId(network),
element.getProvider().getName())) {
- // The physicalNetworkId will not get translated into a
uuid by the reponse serializer,
- // because the serializer would look up the NetworkVO
class's table and retrieve the
- // network id instead of the physical network id.
- // So just throw this exception as is. We may need to TBD
by changing the serializer.
- throw new CloudRuntimeException("Service provider " +
element.getProvider().getName() + " either doesn't exist or is not enabled in
physical network id: "
- + network.getPhysicalNetworkId());
- }
-
- if (s_logger.isDebugEnabled()) {
- s_logger.debug("Asking " + element.getName() + " to
implement " + network);
- }
-
- if (!element.implement(network, offering, dest, context)) {
- final CloudRuntimeException ex = new
CloudRuntimeException("Failed to implement provider " +
element.getProvider().getName() + " for network with specified id");
- ex.addProxyObject(network.getUuid(), "networkId");
- throw ex;
- }
- }
- }
-
+ List<Provider> providersToImplement =
getNetworkProviders(network.getId());
+ implementNetworkElements(dest, context, network, offering,
providersToImplement);
//Reset the extra DHCP option that may have been cleared per nic.
List<NicVO> nicVOs = _nicDao.listByNetworkId(network.getId());
@@ -1216,6 +1182,31 @@ public void implementNetworkElementsAndResources(final
DeployDestination dest, f
}
}
+ protected void implementNetworkElements(DeployDestination dest,
ReservationContext context, Network network, NetworkOffering offering,
List<Provider> providersToImplement)
+ throws ConcurrentOperationException, ResourceUnavailableException,
InsufficientCapacityException {
+ for (NetworkElement element : networkElements) {
+ if (providersToImplement.contains(element.getProvider())) {
+ if
(!_networkModel.isProviderEnabledInPhysicalNetwork(_networkModel.getPhysicalNetworkId(network),
element.getProvider().getName())) {
+ // The physicalNetworkId will not get translated into a
uuid by the reponse serializer,
+ // because the serializer would look up the NetworkVO
class's table and retrieve the
+ // network id instead of the physical network id.
+ // So just throw this exception as is. We may need to TBD
by changing the serializer.
+ throw new CloudRuntimeException("Service provider " +
element.getProvider().getName() + " either doesn't exist or is not enabled in
physical network id: "
+ + network.getPhysicalNetworkId());
+ }
+
+ if (s_logger.isDebugEnabled()) {
+ s_logger.debug("Asking " + element.getName() + " to
implemenet " + network);
+ }
+
+ if (!element.implement(network, offering, dest, context)) {
+ CloudRuntimeException ex = new
CloudRuntimeException("Failed to implement provider " +
element.getProvider().getName() + " for network with specified id");
+ ex.addProxyObject(network.getUuid(), "networkId");
+ throw ex;
+ }
+ }
+ }
+ }
// This method re-programs the rules/ips for existing network
protected boolean reprogramNetworkRules(final long networkId, final
Account caller, final Network network) throws ResourceUnavailableException {
boolean success = true;
@@ -1348,7 +1339,7 @@ public boolean canUpdateInSequence(Network network,
boolean forced){
throw new UnsupportedOperationException("Cannot update the
network resources in sequence when providers other than virtualrouter are
used");
}
//check if routers are in correct state before proceeding with the
update
- List<DomainRouterVO>
routers=_rotuerDao.listByNetworkAndRole(network.getId(),
VirtualRouter.Role.VIRTUAL_ROUTER);
+ List<DomainRouterVO> routers=
_routerDao.listByNetworkAndRole(network.getId(),
VirtualRouter.Role.VIRTUAL_ROUTER);
for(DomainRouterVO router :routers){
if(router.getRedundantState()==
VirtualRouter.RedundantState.UNKNOWN){
if(!forced){
@@ -2840,6 +2831,11 @@ public boolean restartNetwork(final Long networkId,
final Account callerAccount,
final ReservationContext context = new ReservationContextImpl(null,
null, callerUser, callerAccount);
if (cleanup) {
+ if
(_networkOfferingDao.findByIdIncludingRemoved(network.getNetworkOfferingId()).getRedundantRouter())
{
+ List<DomainRouterVO> routers =
_routerDao.findByNetwork(network.getId());
+ if (routers != null && !routers.isEmpty())
+ return restartGuestNetworkWithRedundantRouters(network,
routers, context);
+ }
// shutdown the network
s_logger.debug("Shutting down the network id=" + networkId + " as
a part of network restart");
@@ -2868,6 +2864,62 @@ public boolean restartNetwork(final Long networkId,
final Account callerAccount,
}
}
+ /* If there are redundant routers in the isolated network, we follow the
steps to make the network working better
+ * (1) destroy backup router if exists
+ * (2) create new backup router
+ * (3) destroy master router (then the backup will become master)
+ * (4) create a new router as backup router.
+ */
+ private boolean restartGuestNetworkWithRedundantRouters(NetworkVO network,
List<DomainRouterVO> routers, ReservationContext context) throws
ResourceUnavailableException, ConcurrentOperationException,
InsufficientCapacityException {
+ Account caller = CallContext.current().getCallingAccount();
+ long callerUserId = CallContext.current().getCallingUserId();
+
+ // check the master and backup redundant state
+ DomainRouterVO masterRouter = null;
+ DomainRouterVO backupRouter = null;
+ if (routers != null && routers.size() == 1) {
+ masterRouter = routers.get(0);
+ } if (routers != null && routers.size() == 2) {
+ DomainRouterVO router1 = routers.get(0);
+ DomainRouterVO router2 = routers.get(1);
+ if (router1.getRedundantState() ==
VirtualRouter.RedundantState.MASTER || router2.getRedundantState() ==
VirtualRouter.RedundantState.BACKUP) {
+ masterRouter = router1;
+ backupRouter = router2;
+ } else if (router1.getRedundantState() ==
VirtualRouter.RedundantState.BACKUP || router2.getRedundantState() ==
VirtualRouter.RedundantState.MASTER) {
+ masterRouter = router2;
+ backupRouter = router1;
+ } else { // both routers are in UNKNOWN state
+ masterRouter = router1;
+ backupRouter = router2;
+ }
+ }
+
+ NetworkOfferingVO offering =
_networkOfferingDao.findByIdIncludingRemoved(network.getNetworkOfferingId());
+ DeployDestination dest = new
DeployDestination(_dcDao.findById(network.getDataCenterId()), null, null, null);
+ List<Provider> providersToImplement =
getNetworkProviders(network.getId());
+
+ // destroy backup router
+ if (backupRouter != null) {
+ _routerService.destroyRouter(backupRouter.getId(), caller,
callerUserId);
+ }
+ // create new backup router
+ implementNetworkElements(dest, context, network, offering,
providersToImplement);
+
+ // destroy master router
+ if (masterRouter != null) {
+ try {
+ Thread.sleep(10000); // wait 10s for the keepalived/conntrackd
on backup router
+ } catch (InterruptedException e) {
+ s_logger.trace("Ignoring InterruptedException.", e);
+ }
+ _routerService.destroyRouter(masterRouter.getId(), caller,
callerUserId);
+ // create a new router
+ implementNetworkElements(dest, context, network, offering,
providersToImplement);
+ }
+
+ return true;
+ }
+
private void setRestartRequired(final NetworkVO network, final boolean
restartRequired) {
s_logger.debug("Marking network " + network + " with restartRequired="
+ restartRequired);
network.setRestartRequired(restartRequired);
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> restartnetwork with cleanup should not update/restart both routers at once
> --------------------------------------------------------------------------
>
> Key: CLOUDSTACK-9114
> URL: https://issues.apache.org/jira/browse/CLOUDSTACK-9114
> Project: CloudStack
> Issue Type: Improvement
> Security Level: Public(Anyone can view this level - this is the
> default.)
> Reporter: Wei Zhou
> Assignee: Wei Zhou
> Priority: Major
>
> for now, restartnetwork with cleanup will stop both RVRs at first, then start
> two new RVRs.
> to reduce the downtime of network, we'd better restart the RVRs one by one.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)