Michael Kublin has uploaded a new change for review.

Change subject: core:  Auto-Recovery should check whether getVdsStats returns 
'lastCheck<60' before it proclaims host as up (#844438)
......................................................................

core:  Auto-Recovery should check whether getVdsStats returns 'lastCheck<60' 
before it proclaims host as up (#844438)

https://bugzilla.redhat.com/844438

The described bug is a private case of regular activate host operation.
The problem is that the condition for moving host to NonOperational is 
different for those that move host to Active state.
Solution is during IniVdsOnUpCommand perform getVdsStats in order to get info 
about storage domains that host can see,
if all Active domins in cluster is seen by host it will be moved Active, 
otherwise it will be left as NonOperational
with reason: STORAGE_DOMAIN_UNREACHABLE

Change-Id: Id88ef24829d5fae61dddd34f0265ae3132672783
Signed-off-by: Michael Kublin <[email protected]>
---
M 
backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/InitVdsOnUpCommand.java
M 
backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/AuditLogType.java
M 
backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/vdscommands/VDSCommandType.java
M 
backend/manager/modules/dal/src/main/java/org/ovirt/engine/core/dal/dbbroker/auditloghandling/AuditLogDirector.java
M 
backend/manager/modules/dal/src/main/resources/bundles/AuditLogMessages.properties
M 
backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
6 files changed, 82 insertions(+), 7 deletions(-)


  git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/31/8131/1

diff --git 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/InitVdsOnUpCommand.java
 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/InitVdsOnUpCommand.java
index 06d56bb..448d028 100644
--- 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/InitVdsOnUpCommand.java
+++ 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/InitVdsOnUpCommand.java
@@ -21,9 +21,11 @@
 import org.ovirt.engine.core.common.businessentities.VdsSpmStatus;
 import org.ovirt.engine.core.common.businessentities.storage_pool;
 import org.ovirt.engine.core.common.businessentities.gluster.GlusterServerInfo;
+import org.ovirt.engine.core.common.errors.VdcBLLException;
 import 
org.ovirt.engine.core.common.vdscommands.ConnectStoragePoolVDSCommandParameters;
 import org.ovirt.engine.core.common.vdscommands.VDSCommandType;
 import org.ovirt.engine.core.common.vdscommands.VDSReturnValue;
+import 
org.ovirt.engine.core.common.vdscommands.VdsIdAndVdsVDSCommandParametersBase;
 import org.ovirt.engine.core.common.vdscommands.VdsIdVDSCommandParametersBase;
 import 
org.ovirt.engine.core.common.vdscommands.gluster.GlusterHostAddVDSParameters;
 import org.ovirt.engine.core.compat.Guid;
@@ -33,6 +35,7 @@
 import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AuditLogDirector;
 import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AuditLogableBase;
 import org.ovirt.engine.core.dao.InterfaceDAO;
+import org.ovirt.engine.core.vdsbroker.irsbroker.IrsBrokerCommand;
 
 /**
  * Initialize Vds on its loading. For storages: First connect all storage
@@ -40,6 +43,7 @@
  *
  * After server initialized - its will be moved to Up status.
  */
+@SuppressWarnings("serial")
 @NonTransactiveCommandAttribute
 public class InitVdsOnUpCommand<T extends StoragePoolParametersBase> extends 
StorageHandlingCommandBase<T> {
     private boolean _fencingSucceeded = true;
@@ -130,10 +134,7 @@
                 _connectStorageSucceeded = true;
                 try {
                     setStoragePool(null);
-                    returnValue = _connectPoolSucceeded = Backend
-                            .getInstance()
-                            .getResourceManager()
-                            .RunVdsCommand(
+                    returnValue = _connectPoolSucceeded = runVdsCommand(
                                     VDSCommandType.ConnectStoragePool,
                                     new 
ConnectStoragePoolVDSCommandParameters(getVds().getId(), getVds()
                                             .getstorage_pool_id(), 
getVds().getvds_spm_id(), getMasterDomainIdFromDb(),
@@ -143,6 +144,13 @@
                             .getname());
                     returnValue = false;
                 }
+                if(returnValue) {
+                    returnValue = proceedVdsStats();
+                    if(!returnValue) {
+                        AuditLogDirector.log(new AuditLogableBase(getVdsId()),
+                                AuditLogType.VDS_STORAGE_VDS_STATS_FAILED);
+                    }
+                }
                 // if couldn't connect check if this is the only vds
                 // return true if connect succeeded or it's the only vds
                 if (!returnValue && suppressCheck) {
@@ -155,6 +163,26 @@
         return returnValue;
     }
 
+    protected boolean proceedVdsStats() {
+        boolean returnValue = true;
+        try {
+            runVdsCommand(VDSCommandType.GetStats, new 
VdsIdAndVdsVDSCommandParametersBase(getVds()));
+            if 
(IrsBrokerCommand.isDomainsReportedAsProblematic(getVds().getstorage_pool_id(), 
getVds().getDomains())) {
+                log.errorFormat("One of the domains of host {0} in pool {1} is 
problematic",
+                        getVds().getvds_name(),
+                        getStoragePool()
+                                .getname());
+                returnValue = false;
+            }
+        } catch (VdcBLLException e) {
+            log.errorFormat("Could not get vds stats for vds {0} because of 
error {1}",
+                    getVds().getvds_name(),
+                    e);
+            returnValue = false;
+        }
+        return returnValue;
+    }
+
     @Override
     public AuditLogType getAuditLogTypeValue() {
         AuditLogType type = AuditLogType.UNASSIGNED;
diff --git 
a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/AuditLogType.java
 
b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/AuditLogType.java
index 14556b5..883124b 100644
--- 
a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/AuditLogType.java
+++ 
b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/AuditLogType.java
@@ -42,6 +42,7 @@
     VDS_CPU_LOWER_THAN_CLUSTER(515),
     VDS_CPU_RETRIEVE_FAILED(516),
     VDS_STORAGE_CONNECTION_FAILED_BUT_LAST_VDS(533),
+    VDS_STORAGE_VDS_STATS_FAILED(534),
     VDS_SET_NONOPERATIONAL(517, AuditLogTimeInterval.MINUTE.getValue()),
     VDS_SET_NONOPERATIONAL_FAILED(518, AuditLogTimeInterval.MINUTE.getValue()),
     VDS_SET_NONOPERATIONAL_NETWORK(519, 
AuditLogTimeInterval.MINUTE.getValue()),
diff --git 
a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/vdscommands/VDSCommandType.java
 
b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/vdscommands/VDSCommandType.java
index 7d0d18a6..8f3ec23 100644
--- 
a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/vdscommands/VDSCommandType.java
+++ 
b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/vdscommands/VDSCommandType.java
@@ -5,6 +5,7 @@
     RemoveVds("org.ovirt.engine.core.vdsbroker"),
     ActivateVds("org.ovirt.engine.core.vdsbroker"),
     FenceVds("org.ovirt.engine.core.vdsbroker.vdsbroker"),
+    GetStats("org.ovirt.engine.core.vdsbroker.vdsbroker"),
     CreateVm("org.ovirt.engine.core.vdsbroker"),
     DestroyVm("org.ovirt.engine.core.vdsbroker"),
     Pause("org.ovirt.engine.core.vdsbroker.vdsbroker"),
diff --git 
a/backend/manager/modules/dal/src/main/java/org/ovirt/engine/core/dal/dbbroker/auditloghandling/AuditLogDirector.java
 
b/backend/manager/modules/dal/src/main/java/org/ovirt/engine/core/dal/dbbroker/auditloghandling/AuditLogDirector.java
index 3ccb016..590e079 100644
--- 
a/backend/manager/modules/dal/src/main/java/org/ovirt/engine/core/dal/dbbroker/auditloghandling/AuditLogDirector.java
+++ 
b/backend/manager/modules/dal/src/main/java/org/ovirt/engine/core/dal/dbbroker/auditloghandling/AuditLogDirector.java
@@ -263,6 +263,7 @@
         mSeverities.put(AuditLogType.VDS_ALERT_FENCING_NO_PROXY_HOST, 
AuditLogSeverity.NORMAL);
         mSeverities.put(AuditLogType.VDS_LOW_MEM, AuditLogSeverity.WARNING);
         
mSeverities.put(AuditLogType.VDS_STORAGE_CONNECTION_FAILED_BUT_LAST_VDS, 
AuditLogSeverity.ERROR);
+        mSeverities.put(AuditLogType.VDS_STORAGE_VDS_STATS_FAILED, 
AuditLogSeverity.ERROR);
         mSeverities.put(AuditLogType.VDS_LOW_DISK_SPACE, 
AuditLogSeverity.WARNING);
         mSeverities.put(AuditLogType.VDS_LOW_DISK_SPACE_ERROR, 
AuditLogSeverity.ERROR);
         mSeverities.put(AuditLogType.VDS_ACTIVATE_ASYNC, 
AuditLogSeverity.NORMAL);
diff --git 
a/backend/manager/modules/dal/src/main/resources/bundles/AuditLogMessages.properties
 
b/backend/manager/modules/dal/src/main/resources/bundles/AuditLogMessages.properties
index e861513..e3d958c 100644
--- 
a/backend/manager/modules/dal/src/main/resources/bundles/AuditLogMessages.properties
+++ 
b/backend/manager/modules/dal/src/main/resources/bundles/AuditLogMessages.properties
@@ -358,6 +358,7 @@
 CPU_FLAGS_NX_IS_MISSING=Host ${VdsName} is missing the NX cpu flag. This flag 
can be enabled via the host BIOS. Please set Disable Execute (XD) for an Intel 
host, or No Execute (NX) for AMD.  Please make sure to completely power off the 
host for this change to take effect.
 VDS_CPU_RETRIEVE_FAILED=Failed to determine Host ${VdsName} CPU level - could 
not retrieve CPU flags.
 VDS_STORAGE_CONNECTION_FAILED_BUT_LAST_VDS=Failed to connect Host ${VdsName} 
to Data Center, due to connectivity errors with the Storage. Host ${VdsName} 
will remain in Up state (but inactive), as it is the last Host in the Data 
Center, to enable manual intervention by the Administrator.
+VDS_STORAGE_VDS_STATS_FAILED=Host ${VdsName} reports about one of the active 
domains as problematic.
 VDS_SET_NONOPERATIONAL=Host ${VdsName} moved to Non-Operational state.
 VDS_SET_NONOPERATIONAL_FAILED=Failed to move Host ${VdsName} to 
Non-Operational state.
 VDS_FENCE_STATUS=Host ${VdsName} power management was verified successfully.
diff --git 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
index 07cce29..87ac3b6 100644
--- 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
+++ 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
@@ -87,6 +87,14 @@
         }
     }
 
+    public static boolean isDomainsReportedAsProblematic(Guid storagePoolId, 
List<VDSDomainsData> vdsDomainsData) {
+        IrsProxyData proxy = _irsProxyData.get(storagePoolId);
+        if (proxy != null) {
+            return proxy.isDomainsReportedAsProblematic(vdsDomainsData);
+        }
+        return false;
+    }
+
     public static void lockDbSave(Guid storagePoolId) {
         IrsProxyData proxy = _irsProxyData.get(storagePoolId);
         if (proxy != null) {
@@ -1098,13 +1106,13 @@
                     List<Guid> domainsSeenByVdsInProblem = new 
ArrayList<Guid>();
                     for (VDSDomainsData tempData : data) {
                         if (domainsInPool.contains(tempData.getDomainId())) {
-                            if (isDomainReportedAsProblematic(tempData)) {
+                            if (isDomainReportedAsProblematic(tempData, 
false)) {
                                 
domainsSeenByVdsInProblem.add(tempData.getDomainId());
                             } else if (tempData.getDelay() > Config.<Double> 
GetValue(ConfigValues.MaxStorageVdsDelayCheckSec)) {
                                 logDelayedDomain(vdsId, tempData);
                             }
                         } else if 
(inActiveDomainsInPool.contains(tempData.getDomainId())
-                                && !isDomainReportedAsProblematic(tempData)) {
+                                && !isDomainReportedAsProblematic(tempData, 
false)) {
                             log.warnFormat("Storage {0} was reported by vds 
{1} as active in pool {2}, moving to active status",
                                     tempData.getDomainId(),
                                     vdsName,
@@ -1154,12 +1162,47 @@
                     AuditLogType.VDS_DOMAIN_DELAY_INTERVAL);
         }
 
-        private boolean isDomainReportedAsProblematic(VDSDomainsData tempData) 
{
+        public boolean isDomainsReportedAsProblematic(List<VDSDomainsData> 
vdsDomainsData) {
+            Set<Guid> domainsInPool = new HashSet<Guid>(
+                    
DbFacade.getInstance().getStorageDomainStaticDAO().getAllIds(
+                            _storagePoolId, StorageDomainStatus.Active));
+            
domainsInPool.addAll(DbFacade.getInstance().getStorageDomainStaticDAO().getAllIds(
+                    _storagePoolId, StorageDomainStatus.Unknown));
+            List<Guid> domainWhicWereSeen = new ArrayList<Guid>();
+            for (VDSDomainsData vdsDomainData : vdsDomainsData) {
+                if (domainsInPool.contains(vdsDomainData.getDomainId())) {
+                    if (isDomainReportedAsProblematic(vdsDomainData, true)) {
+                        return true;
+                    }
+                    domainWhicWereSeen.add(vdsDomainData.getDomainId());
+                }
+            }
+            domainsInPool.removeAll(domainWhicWereSeen);
+            if (domainsInPool.size() > 0) {
+                for (Guid domainId : domainsInPool) {
+                    log.errorFormat("Domain {0} was not seen by host", 
domainId);
+                }
+                return true;
+            }
+            return false;
+        }
+
+        private boolean isDomainReportedAsProblematic(VDSDomainsData tempData, 
boolean isLog) {
             if (tempData.getCode() != 0) {
+                if (isLog) {
+                    log.errorFormat("Domain {0} was reported with error code 
{1}",
+                            tempData.getDomainId(),
+                            tempData.getCode());
+                }
                 return true;
             }
             if (tempData.getLastCheck() > Config
                     .<Double> 
GetValue(ConfigValues.MaxStorageVdsTimeoutCheckSec)) {
+                if (isLog) {
+                    log.errorFormat("Domain {0} was reported with too big 
lastCheck {1}",
+                            tempData.getDomainId(),
+                            tempData.getLastCheck());
+                }
                 return true;
             }
             return false;


--
To view, visit http://gerrit.ovirt.org/8131
To unsubscribe, visit http://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Id88ef24829d5fae61dddd34f0265ae3132672783
Gerrit-PatchSet: 1
Gerrit-Project: ovirt-engine
Gerrit-Branch: master
Gerrit-Owner: Michael Kublin <[email protected]>
_______________________________________________
Engine-patches mailing list
[email protected]
http://lists.ovirt.org/mailman/listinfo/engine-patches

Reply via email to