Kanagaraj M has uploaded a new change for review. Change subject: gluster: check gluster daemon status in sync-job ......................................................................
gluster: check gluster daemon status in sync-job In sync-job, peer list will be retrieved always from the first UP server. There are chances that gluster daemon in other servers may be down. In this case, peer status for those servers will DISCONNECTED. When a host is found in UP state in Database and peer status is 'DISCONNECTED', that host will be moved to Non-Operational status. 'peer status' command will be executed on that server to make sure the gluster daemon is actually down before moving to Non-Operational. Change-Id: I3e0f661cb496e741f7c06df42ce3b55037a87e28 Bug-Url: https://bugzilla.redhat.com/1056997 Signed-off-by: Kanagaraj M <[email protected]> --- M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/gluster/GlusterSyncJob.java 1 file changed, 34 insertions(+), 18 deletions(-) git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/37/23737/1 diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/gluster/GlusterSyncJob.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/gluster/GlusterSyncJob.java index 234ea3e..d307a17 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/gluster/GlusterSyncJob.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/gluster/GlusterSyncJob.java @@ -28,15 +28,18 @@ import org.ovirt.engine.core.common.businessentities.gluster.GlusterVolumeAdvancedDetails; import org.ovirt.engine.core.common.businessentities.gluster.GlusterVolumeEntity; import org.ovirt.engine.core.common.businessentities.gluster.GlusterVolumeOptionEntity; +import org.ovirt.engine.core.common.businessentities.gluster.PeerStatus; import org.ovirt.engine.core.common.businessentities.gluster.TransportType; import org.ovirt.engine.core.common.businessentities.network.VdsNetworkInterface; import org.ovirt.engine.core.common.constants.gluster.GlusterConstants; +import org.ovirt.engine.core.common.errors.VdcBllErrors; import org.ovirt.engine.core.common.gluster.GlusterFeatureSupported; import org.ovirt.engine.core.common.utils.ListUtils; import org.ovirt.engine.core.common.utils.gluster.GlusterCoreUtil; import org.ovirt.engine.core.common.vdscommands.RemoveVdsVDSCommandParameters; import org.ovirt.engine.core.common.vdscommands.VDSCommandType; import org.ovirt.engine.core.common.vdscommands.VDSReturnValue; +import org.ovirt.engine.core.common.vdscommands.VdsIdVDSCommandParametersBase; import org.ovirt.engine.core.common.vdscommands.gluster.GlusterVolumeAdvancedDetailsVDSParameters; import org.ovirt.engine.core.common.vdscommands.gluster.GlusterVolumesListVDSParameters; import org.ovirt.engine.core.compat.Guid; @@ -48,6 +51,7 @@ import org.ovirt.engine.core.utils.timer.OnTimerMethodAnnotation; import org.ovirt.engine.core.utils.transaction.TransactionMethod; import org.ovirt.engine.core.utils.transaction.TransactionSupport; +import org.ovirt.engine.core.vdsbroker.ResourceManager; /** * This class is responsible for keeping the Gluster related data of engine in sync with the actual data retrieved from @@ -148,17 +152,30 @@ boolean serverRemoved = false; for (VDS server : existingServers) { - if (isRemovableStatus(server.getStatus()) && serverDetached(server, fetchedServers)) { - log.infoFormat("Server {0} has been removed directly using the gluster CLI. Removing it from engine as well.", - server.getName()); - logUtil.logServerMessage(server, AuditLogType.GLUSTER_SERVER_REMOVED_FROM_CLI); - try (EngineLock lock = getGlusterUtil().acquireGlusterLockWait(server.getId())) { - removeServerFromDb(server); - // remove the server from resource manager - runVdsCommand(VDSCommandType.RemoveVds, new RemoveVdsVDSCommandParameters(server.getId())); - serverRemoved = true; - } catch (Exception e) { - log.errorFormat("Error while removing server {0} from database!", server.getName(), e); + + if (isRemovableStatus(server.getStatus())) { + GlusterServerInfo glusterServer = findGlusterServer(server, fetchedServers); + if (glusterServer == null) { + log.infoFormat("Server {0} has been removed directly using the gluster CLI. Removing it from engine as well.", + server.getName()); + logUtil.logServerMessage(server, AuditLogType.GLUSTER_SERVER_REMOVED_FROM_CLI); + try (EngineLock lock = getGlusterUtil().acquireGlusterLockWait(server.getId())) { + removeServerFromDb(server); + // remove the server from resource manager + runVdsCommand(VDSCommandType.RemoveVds, new RemoveVdsVDSCommandParameters(server.getId())); + serverRemoved = true; + } catch (Exception e) { + log.errorFormat("Error while removing server {0} from database!", server.getName(), e); + } + } + else if (server.getStatus() == VDSStatus.Up && glusterServer.getStatus() == PeerStatus.DISCONNECTED) { + // check gluster is running, if down then move the host to Non-Operational + VDSReturnValue returnValue = ResourceManager.getInstance().runVdsCommand(VDSCommandType.GlusterServersList, + new VdsIdVDSCommandParametersBase(server.getId())); + if (!returnValue.getSucceeded() + && returnValue.getVdsError().getCode() == VdcBllErrors.GlusterCmdExecFailedException) { + setNonOperational(server); + } } } } @@ -202,32 +219,31 @@ } /** - * Returns true if the given server has been detached i.e. cannot be found in the list of fetched servers. + * Returns the equivalent GlusterServer from the list of fetched servers. * * @param server * @param fetchedServers - * @return + * @return GlusterServerInfo */ - private boolean serverDetached(VDS server, List<GlusterServerInfo> fetchedServers) { + private GlusterServerInfo findGlusterServer(VDS server, List<GlusterServerInfo> fetchedServers) { if (GlusterFeatureSupported.glusterHostUuidSupported(server.getVdsGroupCompatibilityVersion())) { // compare gluster host uuid stored in server with the ones fetched from list GlusterServer glusterServer = getGlusterServerDao().getByServerId(server.getId()); for (GlusterServerInfo fetchedServer : fetchedServers) { if (fetchedServer.getUuid().equals(glusterServer.getGlusterServerUuid())) { - return false; + return fetchedServer; } } - return true; } else { List<String> vdsIps = getVdsIps(server); for (GlusterServerInfo fetchedServer : fetchedServers) { if (fetchedServer.getHostnameOrIp().equals(server.getHostName()) || vdsIps.contains(fetchedServer.getHostnameOrIp())) { - return false; + return fetchedServer; } } - return true; } + return null; } private List<String> getVdsIps(VDS vds) { -- To view, visit http://gerrit.ovirt.org/23737 To unsubscribe, visit http://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I3e0f661cb496e741f7c06df42ce3b55037a87e28 Gerrit-PatchSet: 1 Gerrit-Project: ovirt-engine Gerrit-Branch: master Gerrit-Owner: Kanagaraj M <[email protected]> _______________________________________________ Engine-patches mailing list [email protected] http://lists.ovirt.org/mailman/listinfo/engine-patches
