Mike Kolesnik has uploaded a new change for review. Change subject: engine: Auto recovery ignores hosts with problematic NICs ......................................................................
engine: Auto recovery ignores hosts with problematic NICs Such hosts cannot be automatically recovered since the NICs are down and the host will just go back to non-operational state. Change-Id: I70bff4ba5541156c31fb8afdd38ff6f706c0d869 Bug-Url: https://bugzilla.redhat.com/953115 Signed-off-by: Mike Kolesnik <[email protected]> --- M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/AutoRecoveryManager.java M backend/manager/modules/bll/src/test/java/org/ovirt/engine/core/bll/AutoRecoveryManagerTest.java M backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsUpdateRunTimeInfo.java 3 files changed, 124 insertions(+), 50 deletions(-) git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/47/14047/1 diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/AutoRecoveryManager.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/AutoRecoveryManager.java index 9fc4247..3a41f0c 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/AutoRecoveryManager.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/AutoRecoveryManager.java @@ -1,5 +1,6 @@ package org.ovirt.engine.core.bll; +import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -9,10 +10,11 @@ import org.ovirt.engine.core.common.action.VdcActionType; import org.ovirt.engine.core.common.action.VdsActionParameters; import org.ovirt.engine.core.common.businessentities.BusinessEntity; -import org.ovirt.engine.core.common.businessentities.VDS; import org.ovirt.engine.core.common.businessentities.StorageDomain; +import org.ovirt.engine.core.common.businessentities.VDS; import org.ovirt.engine.core.common.config.Config; import org.ovirt.engine.core.common.config.ConfigValues; +import org.ovirt.engine.core.common.utils.Pair; import org.ovirt.engine.core.compat.Guid; import org.ovirt.engine.core.dal.dbbroker.DbFacade; import org.ovirt.engine.core.dao.AutoRecoverDAO; @@ -20,6 +22,7 @@ import org.ovirt.engine.core.utils.log.LogFactory; import org.ovirt.engine.core.utils.timer.OnTimerMethodAnnotation; import org.ovirt.engine.core.utils.timer.SchedulerUtilQuartzImpl; +import org.ovirt.engine.core.vdsbroker.VdsUpdateRunTimeInfo; /** * Runs scheduled autorecovery jobs. @@ -62,6 +65,22 @@ params.setRunSilent(true); return params; } + }, new FilterClosure<VDS>() { + @Override + public List<VDS> filter(List<VDS> list) { + List<VDS> filtered = new ArrayList<>(list.size()); + + for (VDS vds : list) { + Pair<List<String>, List<String>> problematicNics = + VdsUpdateRunTimeInfo.determineProblematicNics(getDbFacade().getInterfaceDao() + .getAllInterfacesForVds(vds.getId()), getDbFacade().getNetworkDao() + .getAllForCluster(vds.getVdsGroupId())); + if (problematicNics.getFirst().isEmpty()) { + filtered.add(vds); + } + } + return filtered; + } }, "hosts"); check(dbFacade.getStorageDomainDao(), VdcActionType.ConnectDomainToStorage, @@ -73,6 +92,11 @@ params.setRunSilent(true); return params; } + }, new FilterClosure<StorageDomain>() { + @Override + public List<StorageDomain> filter(List<StorageDomain> list) { + return list; + } }, "storage domains"); } @@ -81,18 +105,20 @@ * @param dao the dao to get the list of failing resources from * @param actionType autorecovery action * @param paramsCallback a closure to create the parameters for the autorecovery action + * @param filter a filter to select the recoverable resources * @param logMsg a user-readable name for the failing resource type */ <T extends BusinessEntity<Guid>> void check(final AutoRecoverDAO<T> dao, final VdcActionType actionType, final DoWithClosure<T, VdcActionParametersBase> paramsCallback, + final FilterClosure<T> filter, final String logMsg) { if (!shouldPerformRecoveryOnType(logMsg)) { log.info("Autorecovering " + logMsg + " is disabled, skipping"); return; } log.debugFormat("Checking autorecoverable {0}" , logMsg); - final List<T> fails = dao.listFailedAutorecoverables(); + final List<T> fails = filter.filter(dao.listFailedAutorecoverables()); if (fails.size() > 0) { final BackendInternal backend = getBackend(); log.info("Autorecovering " + fails.size() + " " + logMsg); @@ -125,6 +151,10 @@ R doWith(T arg); } + private interface FilterClosure<T> { + List<T> filter(List<T> list); + } + private static boolean shouldPerformRecoveryOnType(String type) { Map<String, String> allowedRecoveryTypesFromConfig = Config.<Map<String, String>> GetValue(ConfigValues.AutoRecoveryAllowedTypes); diff --git a/backend/manager/modules/bll/src/test/java/org/ovirt/engine/core/bll/AutoRecoveryManagerTest.java b/backend/manager/modules/bll/src/test/java/org/ovirt/engine/core/bll/AutoRecoveryManagerTest.java index b2b619a..998d416 100644 --- a/backend/manager/modules/bll/src/test/java/org/ovirt/engine/core/bll/AutoRecoveryManagerTest.java +++ b/backend/manager/modules/bll/src/test/java/org/ovirt/engine/core/bll/AutoRecoveryManagerTest.java @@ -12,6 +12,7 @@ import static org.ovirt.engine.core.utils.MockConfigRule.mockConfig; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -25,14 +26,18 @@ import org.ovirt.engine.core.bll.interfaces.BackendInternal; import org.ovirt.engine.core.common.action.VdcActionParametersBase; import org.ovirt.engine.core.common.action.VdcActionType; -import org.ovirt.engine.core.common.businessentities.VDS; import org.ovirt.engine.core.common.businessentities.StorageDomain; +import org.ovirt.engine.core.common.businessentities.VDS; +import org.ovirt.engine.core.common.businessentities.network.Network; +import org.ovirt.engine.core.common.businessentities.network.VdsNetworkInterface; import org.ovirt.engine.core.common.config.Config; import org.ovirt.engine.core.common.config.ConfigValues; import org.ovirt.engine.core.compat.Guid; import org.ovirt.engine.core.dal.dbbroker.DbFacade; import org.ovirt.engine.core.dao.StorageDomainDAO; import org.ovirt.engine.core.dao.VdsDAO; +import org.ovirt.engine.core.dao.network.InterfaceDao; +import org.ovirt.engine.core.dao.network.NetworkDao; import org.ovirt.engine.core.utils.MockConfigRule; @RunWith(MockitoJUnitRunner.class) @@ -69,6 +74,15 @@ final StorageDomainDAO storageDomainDaoMock = mock(StorageDomainDAO.class); when(storageDomainDaoMock.listFailedAutorecoverables()).thenReturn(storageDomains); when(dbFacadeMock.getStorageDomainDao()).thenReturn(storageDomainDaoMock); + + final InterfaceDao interfaceDaoMock = mock(InterfaceDao.class); + doReturn(interfaceDaoMock).when(dbFacadeMock).getInterfaceDao(); + when(interfaceDaoMock.getAllInterfacesForVds(any(Guid.class))) + .thenReturn(Collections.<VdsNetworkInterface> emptyList()); + + final NetworkDao networkDaoMock = mock(NetworkDao.class); + doReturn(networkDaoMock).when(dbFacadeMock).getNetworkDao(); + when(networkDaoMock.getAllForCluster(any(Guid.class))).thenReturn(Collections.<Network> emptyList()); } @Test diff --git a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsUpdateRunTimeInfo.java b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsUpdateRunTimeInfo.java index 25ab2a9..025995d 100644 --- a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsUpdateRunTimeInfo.java +++ b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsUpdateRunTimeInfo.java @@ -644,43 +644,15 @@ if (_vds.getStatus() != VDSStatus.Up) { return; } - Map<String, Boolean> bondsWithStatus = new HashMap<String, Boolean>(); - List<Network> clusterNetworks = getDbFacade().getNetworkDao() - .getAllForCluster(_vds.getVdsGroupId()); + List<String> networks = new ArrayList<String>(); List<String> brokenNics = new ArrayList<String>(); - Map<String, List<String>> bondsWithListOfNics = new HashMap<String, List<String>>(); - - List<VdsNetworkInterface> interfaces = _vds.getInterfaces(); - Map<String, Network> networksByName = NetworkUtils.networksByName(clusterNetworks); try { - for (VdsNetworkInterface iface : interfaces) { - - // Handle nics that are non bonded and not vlan over bond - if (isRequiredInterfaceDown(networksByName, iface)) { - brokenNics.add(iface.getName()); - networks.add(iface.getNetworkName()); - } - - // Handle bond nics - if (iface.getBondName() != null) { - populate(bondsWithStatus, clusterNetworks, networks, bondsWithListOfNics, iface); - } - } - - // check the bond statuses, if one is down we set the host to down - // only if we didn't already set the host to down - if (brokenNics.isEmpty()) { - for (String key : bondsWithStatus.keySet()) { - if (!bondsWithStatus.get(key)) { - // add the nics name for audit log - for (String name : bondsWithListOfNics.get(key)) { - brokenNics.add(name); - } - } - } - } + Pair<List<String>, List<String>> problematicNics = determineProblematicNics(_vds.getInterfaces(), + getDbFacade().getNetworkDao().getAllForCluster(_vds.getVdsGroupId())); + brokenNics.addAll(problematicNics.getFirst()); + networks.addAll(problematicNics.getSecond()); } catch (Exception e) { log.error(String.format("Failure on checkInterfaces on update runtimeinfo for vds: %s", _vds.getName()), e); @@ -741,13 +713,64 @@ } } - private void populate(Map<String, Boolean> bondsWithStatus, + /** + * Determine which of the given NICs is problematic - there's a network that is required and down on the NIC or + * bond. + * + * @param interfaces + * The NICs to check. + * @param clusterNetworks + * The cluster's networks. + * @return A pair of a list of the names of the NICs which are problematic, and a list of the names of the networks + * which are required by these NICs. + */ + public static Pair<List<String>, List<String>> determineProblematicNics(List<VdsNetworkInterface> interfaces, + List<Network> clusterNetworks) { + Map<String, Boolean> bondsWithStatus = new HashMap<String, Boolean>(); + List<String> networks = new ArrayList<String>(); + List<String> brokenNics = new ArrayList<String>(); + Map<String, List<String>> bondsWithListOfNics = new HashMap<String, List<String>>(); + + Map<String, Network> networksByName = NetworkUtils.networksByName(clusterNetworks); + + for (VdsNetworkInterface iface : interfaces) { + + // Handle nics that are non bonded and not vlan over bond + if (isRequiredInterfaceDown(networksByName, interfaces, iface)) { + brokenNics.add(iface.getName()); + networks.add(iface.getNetworkName()); + } + + // Handle bond nics + if (iface.getBondName() != null) { + populate(bondsWithStatus, interfaces, clusterNetworks, networks, bondsWithListOfNics, iface); + } + } + + // check the bond statuses, if one is down we set the host to down + // only if we didn't already set the host to down + if (brokenNics.isEmpty()) { + for (String key : bondsWithStatus.keySet()) { + if (!bondsWithStatus.get(key)) { + // add the nics name for audit log + for (String name : bondsWithListOfNics.get(key)) { + brokenNics.add(name); + } + } + } + } + + return new Pair<List<String>, List<String>>(brokenNics, networks); + } + + private static void populate(Map<String, Boolean> bondsWithStatus, + List<VdsNetworkInterface> interfaces, List<Network> clusterNetworks, List<String> networks, Map<String, List<String>> bondsWithListOfNics, VdsNetworkInterface iface) { Pair<Boolean, String> retVal = - isRequiredNetworkInCluster(iface.getBondName(), clusterNetworks); + isRequiredNetworkInCluster(iface.getBondName(), interfaces, clusterNetworks); String networkName = retVal.getSecond(); if (retVal.getFirst()) { if (!bondsWithStatus.containsKey(iface.getBondName())) { @@ -780,16 +803,18 @@ * @param networksByName * @param iface */ - private boolean isRequiredInterfaceDown(Map<String, Network> networksByName, VdsNetworkInterface iface) { + private static boolean isRequiredInterfaceDown(Map<String, Network> networksByName, + List<VdsNetworkInterface> interfaces, + VdsNetworkInterface iface) { if (iface.getStatistics().getStatus() != InterfaceStatus.UP && iface.getNetworkName() != null && iface.getBonded() == null - && !isBondOrVlanOverBond(iface) + && !isBondOrVlanOverBond(iface, interfaces) && networksByName.containsKey(iface.getNetworkName())) { Network net = networksByName.get(iface.getNetworkName()); if (net.getCluster().getStatus() == NetworkStatus.OPERATIONAL && net.getCluster().isRequired() - && (iface.getVlanId() == null || !isVlanInterfaceUp(iface))) { + && (iface.getVlanId() == null || !isVlanInterfaceUp(iface, interfaces))) { return true; } } @@ -799,15 +824,18 @@ // method get bond name, list of cluster network - checks if the specified // bonds network is in the clusterNetworks, // if so return true and networkName of the bonds - private Pair<Boolean, String> isRequiredNetworkInCluster(String bondName, List<Network> clusterNetworks) { + private static Pair<Boolean, String> isRequiredNetworkInCluster(String bondName, + List<VdsNetworkInterface> interfaces, + List<Network> clusterNetworks) { Pair<Boolean, String> retVal = new Pair<Boolean, String>(); - for (VdsNetworkInterface iface : _vds.getInterfaces()) { + for (VdsNetworkInterface iface : interfaces) { if (iface.getName().equals(bondName)) { for (Network net : clusterNetworks) { // If this is the network on the bond, or on a vlan over the bond, and the network is required // we want to check this network if ((net.getName().equals(iface.getNetworkName()) - || isVlanOverBondNetwork(bondName, net.getName())) && net.getCluster().isRequired()) { + || isVlanOverBondNetwork(bondName, net.getName(), interfaces)) + && net.getCluster().isRequired()) { retVal.setFirst(true); retVal.setSecond(net.getName()); return retVal; @@ -824,7 +852,7 @@ // IsBond return true if the interface is bond, // it also check if it's vlan over bond and return true in that case // i.e. it return true in case of bond0 and bond0.5 - private boolean isBondOrVlanOverBond(VdsNetworkInterface iface) { + private static boolean isBondOrVlanOverBond(VdsNetworkInterface iface, List<VdsNetworkInterface> interfaces) { if (iface.getBonded() != null && iface.getBonded() == true) { return true; } @@ -835,7 +863,7 @@ return false; } - for (VdsNetworkInterface i : _vds.getInterfaces()) { + for (VdsNetworkInterface i : interfaces) { if (name.equals(i.getName())) { return (i.getBonded() != null && i.getBonded() == true); } @@ -848,8 +876,10 @@ // bond0 and bond0.5 // bond0 is not connectet to network just the bond0.5 is connected to network // and this method check for that case - private boolean isVlanOverBondNetwork(String bondName, String networkName) { - for (VdsNetworkInterface iface : _vds.getInterfaces()) { + private static boolean isVlanOverBondNetwork(String bondName, + String networkName, + List<VdsNetworkInterface> interfaces) { + for (VdsNetworkInterface iface : interfaces) { String name = NetworkUtils.getVlanInterfaceName(iface.getName()); // this if check if the interface is vlan if (name == null) { @@ -863,7 +893,7 @@ } // If vlan we search if the interface is up (i.e. not eth2.5 we look for eth2) - private boolean isVlanInterfaceUp(VdsNetworkInterface vlan) { + private static boolean isVlanInterfaceUp(VdsNetworkInterface vlan, List<VdsNetworkInterface> interfaces) { String[] tokens = vlan.getName().split("[.]"); if (tokens.length == 1) { // not vlan @@ -876,7 +906,7 @@ .append("."); } String ifaceName = StringUtils.stripEnd(sb.toString(), "."); - for (VdsNetworkInterface iface : _vds.getInterfaces()) { + for (VdsNetworkInterface iface : interfaces) { if (iface.getName().equals(ifaceName)) { return iface.getStatistics().getStatus() == InterfaceStatus.UP; } -- To view, visit http://gerrit.ovirt.org/14047 To unsubscribe, visit http://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I70bff4ba5541156c31fb8afdd38ff6f706c0d869 Gerrit-PatchSet: 1 Gerrit-Project: ovirt-engine Gerrit-Branch: master Gerrit-Owner: Mike Kolesnik <[email protected]> _______________________________________________ Engine-patches mailing list [email protected] http://lists.ovirt.org/mailman/listinfo/engine-patches
