Federico Simoncelli has uploaded a new change for review.

Change subject: vdsbroker: improve the domain visibility cache
......................................................................

vdsbroker: improve the domain visibility cache

In order to support more informed decisions, the previous domain status
cache (_domainsInProblem) has been refactored in DomainVisibility.

For each domain in the pool (also the ones in maintenance) it is kept a
map of vds and the Status:

 ACTIVE: the domain is active in the database and in the vds (it is
     reachable, monitored and reported)
 INACTIVE: the domain is marked as in maintenance in the database and
     inactive in the vds (not monitored)
 UNREPORTED: the domain is active in the database but the vds is not
    reporting it (not monitored)
 UNREACHABLE: the domain is active in the database but the vds cannot
    reach it
 UNKNOWN: no status or information is available

For memory usage optimization the DomainVisibility maps are not keeping
the ACTIVE status.

Change-Id: Id74f12d108f75924fbff28409a89bc1caff05908
Signed-off-by: Federico Simoncelli <[email protected]>
---
M 
backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
A 
backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/DomainVisibility.java
M 
backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
3 files changed, 152 insertions(+), 198 deletions(-)


  git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/70/23370/1

diff --git 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
index a8f5bbf..3b4a268 100644
--- 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
+++ 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
@@ -281,9 +281,7 @@
 
                 // Now update the status of domains, this code should not be in
                 // synchronized part of code
-                if (domainsList != null) {
-                    IrsBrokerCommand.updateVdsDomainsData(tmpVds, 
storagePoolId, domainsList);
-                }
+                IrsBrokerCommand.updateVdsDomainsVisibility(tmpVds, 
domainsList);
             } catch (Exception e) {
                 log.error("Timer update runtimeinfo failed. Exception:", e);
             } finally {
diff --git 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/DomainVisibility.java
 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/DomainVisibility.java
new file mode 100644
index 0000000..39895ad
--- /dev/null
+++ 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/DomainVisibility.java
@@ -0,0 +1,79 @@
+package org.ovirt.engine.core.vdsbroker.irsbroker;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.ovirt.engine.core.compat.Guid;
+
+
+public class DomainVisibility {
+
+    public static enum Status {
+        ACTIVE,
+        INACTIVE,
+        UNREPORTED,
+        UNREACHABLE,
+        UNKNOWN;
+
+        public boolean isProblematic() {
+            return this == UNREPORTED || this == UNREACHABLE || this == 
UNKNOWN;
+        }
+    }
+
+    private Map<Guid, Map<Guid, Status>> domainsVisibility;
+
+    public DomainVisibility() {
+        this.domainsVisibility = new HashMap<>();
+    }
+
+    public void clear() {
+        this.domainsVisibility.clear();
+    }
+
+    public synchronized void updatePoolDomains(Set<Guid> domains) {
+        for (Guid domainId : domains) {
+            if (!domainsVisibility.containsKey(domainId)) {
+                domainsVisibility.put(domainId, new HashMap<Guid, Status>());
+            }
+        }
+
+        Iterator<Guid> domainsIterator = domainsVisibility.keySet().iterator();
+        while (domainsIterator.hasNext()) {
+            if (!domains.contains(domainsIterator.next())) {
+                domainsIterator.remove();
+            }
+        }
+    }
+
+    public synchronized void setVdsDomainStatus(Guid vdsId, Guid domainId, 
Status status) {
+        Map<Guid, Status> domainVdsStatus = domainsVisibility.get(domainId);
+
+        if (status == Status.ACTIVE || status == Status.UNKNOWN) {
+            domainVdsStatus.remove(vdsId);
+        } else {
+            domainVdsStatus.put(vdsId, status);
+        }
+    }
+
+    public synchronized void removeVdsStatuses(Guid vdsId) {
+        for (Map<Guid, Status> domainVdsStatus : domainsVisibility.values()) {
+            domainVdsStatus.remove(vdsId);
+        }
+    }
+
+    public synchronized HashSet<Guid> getDomainProblematicVdsList(Guid 
domainId) {
+        HashSet<Guid> vdsList = new HashSet<>();
+        Map<Guid, Status> domainVdsStatus = domainsVisibility.get(domainId);
+
+        for (Entry<Guid, Status> status : domainVdsStatus.entrySet()) {
+            if (status.getValue().isProblematic())
+                vdsList.add(status.getKey());
+        }
+
+        return vdsList;
+    }
+}
diff --git 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
index 10380d9..9957bb8 100644
--- 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
+++ 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
@@ -4,11 +4,9 @@
 import java.net.SocketException;
 import java.text.MessageFormat;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
@@ -90,16 +88,12 @@
     /**
      * process received domain monitoring information from a given vds if 
necessary (according to it's status).
      * @param vds
-     * @param storagePoolId
      * @param vdsDomainData
      */
-    public static void updateVdsDomainsData(VDS vds, Guid storagePoolId,
-            ArrayList<VDSDomainsData> vdsDomainData) {
-        if (vds.getStatus() == VDSStatus.Up) {
-            IrsProxyData proxy = _irsProxyData.get(storagePoolId);
-            if (proxy != null) {
-                proxy.updateVdsDomainsData(vds.getId(), vds.getName(), 
vdsDomainData);
-            }
+    public static void updateVdsDomainsVisibility(VDS vds, 
ArrayList<VDSDomainsData> vdsDomainData) {
+        IrsProxyData proxy = _irsProxyData.get(vds.getStoragePoolId());
+        if (proxy != null) {
+            proxy.updateDomainsVisibility(vds, vdsDomainData);
         }
     }
 
@@ -1001,111 +995,64 @@
             mCurrentVdsId = null;
         }
 
-        private final Map<Guid, HashSet<Guid>> _domainsInProblem = new 
ConcurrentHashMap<Guid, HashSet<Guid>>();
-        private final Map<Guid, String> _timers = new HashMap<Guid, String>();
+        private final Map<Guid, String> _timers = new HashMap<>();
+        private final DomainVisibility domainsVisibility = new 
DomainVisibility();
 
-        public void updateVdsDomainsData(final Guid vdsId, final String 
vdsName,
-                final ArrayList<VDSDomainsData> data) {
+        private static Map<Guid, VDSDomainsData> 
getDataDomainsMap(ArrayList<VDSDomainsData> domainsDataList) {
+            Map<Guid, VDSDomainsData> dataDomainMap = new HashMap<>();
+            for (VDSDomainsData domainsData : domainsDataList) {
+                dataDomainMap.put(domainsData.getDomainId(), domainsData);
+            }
+            return dataDomainMap;
+        }
 
-            Set<Guid> domainsInProblems = null;
-            StoragePool storagePool =
-                    
DbFacade.getInstance().getStoragePoolDao().get(_storagePoolId);
-            if (storagePool != null
-                    && (storagePool.getStatus() == StoragePoolStatus.Up || 
storagePool.getStatus() == StoragePoolStatus.NonResponsive)) {
+        private static Map<Guid, StorageDomain> getPoolDomainsMap(Guid 
storagePoolId) {
+            Map<Guid, StorageDomain> poolDomainMap = new HashMap<>();
+            for (StorageDomain poolDomain :
+                    
DbFacade.getInstance().getStorageDomainDao().getAllForStoragePool(storagePoolId))
 {
+                poolDomainMap.put(poolDomain.getId(), poolDomain);
+            }
+            return poolDomainMap;
+        }
 
-                try {
-                    // build a list of all domains in pool
-                    // which are in status Active or Unknown
-                    Set<Guid> domainsInPool = new HashSet<Guid>(
-                            
DbFacade.getInstance().getStorageDomainStaticDao().getAllIds(
-                                    _storagePoolId, 
StorageDomainStatus.Active));
-                    
domainsInPool.addAll(DbFacade.getInstance().getStorageDomainStaticDao().getAllIds(
-                            _storagePoolId, StorageDomainStatus.Unknown));
-                    Set<Guid> inActiveDomainsInPool =
-                            new HashSet<Guid>(DbFacade.getInstance()
-                                    .getStorageDomainStaticDao()
-                                    .getAllIds(_storagePoolId, 
StorageDomainStatus.InActive));
+        public void updateDomainsVisibility(VDS vds, ArrayList<VDSDomainsData> 
domainsDataList) {
+            // Step 1: return if there's no data to analyze and remove the vds 
if it is in Maintenance.
+            // It's not correct to remove the NonOperational hosts because 
they may be still connected
+            // to the pool. They must be moved to Maintenance.
+            if (domainsDataList == null) {
+                if (vds.getStatus() == VDSStatus.Maintenance) {
+                    domainsVisibility.removeVdsStatuses(vds.getId());
+                }
+                return;
+            }
 
-                    // build a list of all the domains in
-                    // pool (domainsInPool) that are not
-                    // visible by the host.
-                    List<Guid> domainsInPoolThatNonVisibleByVds = new 
ArrayList<Guid>();
-                    Set<Guid> dataDomainIds = new HashSet<Guid>();
-                    for (VDSDomainsData tempData : data) {
-                        dataDomainIds.add(tempData.getDomainId());
-                    }
-                    for (Guid tempDomainId : domainsInPool) {
-                        if (!dataDomainIds.contains(tempDomainId)) {
-                            domainsInPoolThatNonVisibleByVds.add(tempDomainId);
-                        }
-                    }
+            // Step 2: build the domain maps for the reported domains and the 
pool domains
+            Map<Guid, VDSDomainsData> dataDomainMap = 
getDataDomainsMap(domainsDataList);
+            Map<Guid, StorageDomain> poolDomainMap = 
getPoolDomainsMap(_storagePoolId);
 
-                    // build a list of domains that the host
-                    // reports as in problem (code!=0) or (code==0
-                    // && lastChecl >
-                    // ConfigValues.MaxStorageVdsTimeoutCheckSec)
-                    // and are contained in the Active or
-                    // Unknown domains in pool
-                    List<Guid> domainsSeenByVdsInProblem = new 
ArrayList<Guid>();
-                    for (VDSDomainsData tempData : data) {
-                        if (domainsInPool.contains(tempData.getDomainId())) {
-                            if (isDomainReportedAsProblematic(tempData, 
false)) {
-                                
domainsSeenByVdsInProblem.add(tempData.getDomainId());
-                            } else if (tempData.getDelay() > Config.<Double> 
getValue(ConfigValues.MaxStorageVdsDelayCheckSec)) {
-                                logDelayedDomain(vdsId, tempData);
-                            }
-                        } else if 
(inActiveDomainsInPool.contains(tempData.getDomainId())
-                                && !isDomainReportedAsProblematic(tempData, 
false)) {
-                            log.warnFormat("Storage Domain {0} was reported by 
Host {1} as Active in Pool {2}, moving to active status",
-                                    getDomainIdTuple(tempData.getDomainId()),
-                                    vdsName,
-                                    _storagePoolId);
-                            StoragePoolIsoMap map =
-                                    DbFacade.getInstance()
-                                            .getStoragePoolIsoMapDao()
-                                            .get(new 
StoragePoolIsoMapId(tempData.getDomainId(), _storagePoolId));
-                            map.setStatus(StorageDomainStatus.Active);
-                            
DbFacade.getInstance().getStoragePoolIsoMapDao().update(map);
-                        }
-                    }
+            domainsVisibility.updatePoolDomains(poolDomainMap.keySet());
 
-                    // build a list of all potential domains
-                    // in problem
-                    domainsInProblems = new HashSet<Guid>();
-                    domainsInProblems.addAll(domainsInPoolThatNonVisibleByVds);
-                    domainsInProblems.addAll(domainsSeenByVdsInProblem);
+            for (StorageDomain poolDomain : poolDomainMap.values()) {
+                DomainVisibility.Status domainStatus;
+                VDSDomainsData domainData = 
dataDomainMap.get(poolDomain.getId());
 
-                } catch (RuntimeException ex) {
-                    log.error("error in updateVdsDomainsData", ex);
+                if (domainData == null) {
+                    domainStatus = poolDomain.getStatus() == 
StorageDomainStatus.Active ?
+                            DomainVisibility.Status.UNREPORTED : 
DomainVisibility.Status.INACTIVE;
+                } else {
+                    domainStatus = isDomainReportedAsProblematic(domainData, 
false) ?
+                            DomainVisibility.Status.UNREACHABLE : 
DomainVisibility.Status.ACTIVE;
                 }
 
-            }
-            updateDomainInProblem(vdsId, vdsName, domainsInProblems);
-        }
+                domainsVisibility.setVdsDomainStatus(vds.getId(), 
poolDomain.getId(), domainStatus);
 
-        private void updateDomainInProblem(final Guid vdsId, final String 
vdsName, final Set<Guid> domainsInProblems) {
-            if (domainsInProblems != null) {
-                ((EventQueue) EjbUtils.findBean(BeanType.EVENTQUEUE_MANAGER, 
BeanProxyType.LOCAL)).submitEventSync(new Event(_storagePoolId,
-                        null, vdsId, EventType.DOMAINMONITORING, ""),
-                        new Callable<EventResult>() {
-                            @Override
-                            public EventResult call() {
-                                EventResult result = new EventResult(true, 
EventType.DOMAINMONITORING);
-                                updateProblematicVdsData(vdsId, vdsName, 
domainsInProblems);
-                                return result;
-                            }
-                        });
+                if (domainStatus == DomainVisibility.Status.UNREPORTED
+                        || domainStatus == 
DomainVisibility.Status.UNREACHABLE) {
+                    setTimerIfAbsent(poolDomain.getId());
+                } else if 
(domainsVisibility.getDomainProblematicVdsList(poolDomain.getId()).size() == 0) 
{
+                    clearTimer(poolDomain.getId());
+                }
             }
-        }
-
-        private void logDelayedDomain(final Guid vdsId, VDSDomainsData 
tempData) {
-            AuditLogableBase logable = new AuditLogableBase();
-            logable.setVdsId(vdsId);
-            logable.setStorageDomainId(tempData.getDomainId());
-            logable.addCustomValue("Delay",
-                    Double.toString(tempData.getDelay()));
-            AuditLogDirector.log(logable,
-                    AuditLogType.VDS_DOMAIN_DELAY_INTERVAL);
         }
 
         private List<Guid> 
obtainDomainsReportedAsProblematic(List<VDSDomainsData> vdsDomainsData) {
@@ -1155,55 +1102,6 @@
             return false;
         }
 
-        private void updateProblematicVdsData(final Guid vdsId, final String 
vdsName, Set<Guid> domainsInProblems) {
-            // for all problematic domains
-            // update cache of _domainsInProblem
-            // and _vdssInProblem and add a new
-            // timer for new domains in problem
-            Set<Guid> domainsInProblemKeySet = _domainsInProblem.keySet();
-            for (Guid domainId : domainsInProblems) {
-                if (domainsInProblemKeySet.contains(domainId)) {
-                    // existing domains in problem
-                    updateDomainInProblemData(domainId, vdsId, vdsName);
-                } else {
-                    // new domains in problems
-                    addDomainInProblemData(domainId, vdsId, vdsName);
-                }
-            }
-            Set<Guid> notReportedDomainsByHost = new 
HashSet<Guid>(_domainsInProblem.keySet());
-            notReportedDomainsByHost.removeAll(domainsInProblems);
-            for (Guid domainId : notReportedDomainsByHost) {
-                Set<Guid> vdsForDomain = _domainsInProblem.get(domainId);
-                if (vdsForDomain != null && vdsForDomain.contains(vdsId)) {
-                    domainRecoveredFromProblem(domainId, vdsId, vdsName);
-                }
-            }
-        }
-
-        private void domainRecoveredFromProblem(Guid domainId, Guid vdsId, 
String vdsName) {
-            String domainIdTuple = getDomainIdTuple(domainId);
-            log.infoFormat("Domain {0} recovered from problem. vds: {1}", 
domainIdTuple, vdsName);
-            _domainsInProblem.get(domainId).remove(vdsId);
-            if (_domainsInProblem.get(domainId).size() == 0) {
-                log.infoFormat("Domain {0} has recovered from problem. No 
active host in the DC is reporting it as" +
-                        " problematic, so clearing the domain recovery 
timer.", domainIdTuple);
-                _domainsInProblem.remove(domainId);
-                clearTimer(domainId);
-            }
-        }
-
-        private void addDomainInProblemData(Guid domainId, Guid vdsId, String 
vdsName) {
-            _domainsInProblem.put(domainId, new 
java.util.HashSet<Guid>(java.util.Arrays.asList(vdsId)));
-            log.warnFormat("domain {0} in problem. vds: {1}", 
getDomainIdTuple(domainId), vdsName);
-            Class[] inputType = new Class[] { Guid.class };
-            Object[] inputParams = new Object[] { domainId };
-            String jobId = 
SchedulerUtilQuartzImpl.getInstance().scheduleAOneTimeJob(this, "onTimer", 
inputType,
-                    inputParams, Config.<Integer> 
getValue(ConfigValues.StorageDomainFalureTimeoutInMinutes),
-                    TimeUnit.MINUTES);
-            clearTimer(domainId);
-            _timers.put(domainId, jobId);
-        }
-
         @OnTimerMethodAnnotation("onTimer")
         public void onTimer(final Guid domainId) {
             ((EventQueue) EjbUtils.findBean(BeanType.EVENTQUEUE_MANAGER, 
BeanProxyType.LOCAL)).submitEventAsync(new Event(_storagePoolId,
@@ -1212,7 +1110,7 @@
                         @Override
                         public EventResult call() {
                             EventResult result = null;
-                            if (_domainsInProblem.containsKey(domainId)) {
+                            if 
(domainsVisibility.getDomainProblematicVdsList(domainId).size() > 0) {
                                 log.info("starting processDomainRecovery for 
domain " + getDomainIdTuple(domainId));
                                 result = processDomainRecovery(domainId);
                             }
@@ -1220,11 +1118,6 @@
                             return result;
                         }
                     });
-        }
-
-        private void updateDomainInProblemData(Guid domainId, Guid vdsId, 
String vdsName) {
-            log.debugFormat("domain {0} still in problem. vds: {1}", 
getDomainIdTuple(domainId), vdsName);
-            _domainsInProblem.get(domainId).add(vdsId);
         }
 
         private EventResult processDomainRecovery(final Guid domainId) {
@@ -1245,7 +1138,7 @@
             // on this domain as in problem.
             // Mark the above list as hosts we suspect are in
             // problem.
-            Set<Guid> hostsThatReportedDomainAsInProblem = 
_domainsInProblem.get(domainId);
+            Set<Guid> hostsThatReportedDomainAsInProblem = 
domainsVisibility.getDomainProblematicVdsList(domainId);
             List<Guid> vdssInProblem = new ArrayList<Guid>();
             for (Guid tempVDSId : vdssInPool) {
                 if (!hostsThatReportedDomainAsInProblem.contains(tempVDSId)) {
@@ -1269,7 +1162,7 @@
                     // Moving all the hosts which reported on
                     // this domain as in problem to non
                     // operational.
-                    for (final Guid vdsId : _domainsInProblem.get(domainId)) {
+                    for (final Guid vdsId : 
domainsVisibility.getDomainProblematicVdsList(domainId)) {
                         VDS vds = vdsMap.get(vdsId);
                         if (vds == null) {
                             log.warnFormat(
@@ -1328,9 +1221,23 @@
                 }
             }
 
-            // clear from cache of _domainsInProblem
-            clearDomainFromCache(domainId, nonOpVdss);
             return result;
+        }
+
+        private void setTimerIfAbsent(Guid domainId) {
+            synchronized (_timers) {
+                if (_timers.containsKey(domainId))
+                    return;
+
+                String jobId = 
SchedulerUtilQuartzImpl.getInstance().scheduleAOneTimeJob(
+                        this, "onTimer",
+                        new Class[] { Guid.class },
+                        new Object[] { domainId },
+                        Config.<Integer> 
getValue(ConfigValues.StorageDomainFalureTimeoutInMinutes),
+                        TimeUnit.MINUTES);
+
+                _timers.put(domainId, jobId);
+            }
         }
 
         /**
@@ -1347,43 +1254,13 @@
         }
 
         /**
-         * clears the problematic domain from the vdss that reported on this 
domain as problematic and from the domains
-         * in problem
-         *
-         * @param domainId
-         *            - the domain to clear cache for.
-         * @param nonOpVdss - passed vdss that non operational
-         */
-        private void clearDomainFromCache(Guid domainId, List<Guid> nonOpVdss) 
{
-            if (domainId != null) {
-                _domainsInProblem.remove(domainId);
-            }
-            removeVdsAsProblematic(nonOpVdss);
-        }
-
-        private void removeVdsAsProblematic(List<Guid> nonOpVdss) {
-            Iterator<Map.Entry<Guid, HashSet<Guid>>> iterDomainsInProblem = 
_domainsInProblem.entrySet().iterator();
-            while (iterDomainsInProblem.hasNext()) {
-                Map.Entry<Guid, HashSet<Guid>> entry = 
iterDomainsInProblem.next();
-                entry.getValue().removeAll(nonOpVdss);
-                if (entry.getValue().isEmpty()) {
-                    iterDomainsInProblem.remove();
-                    clearTimer(entry.getKey());
-                    log.infoFormat("Domain {0} has recovered from problem. No 
active host in the DC is reporting it as poblematic, so clearing the domain 
recovery timer.",
-                            getDomainIdTuple(entry.getKey()));
-                }
-
-            }
-        }
-
-        /**
          * deletes all the jobs for the domains in the pool and clears the 
problematic entities caches.
          */
         public void clearCache() {
             log.info("clearing cache for problematic entities in pool " + 
_storagePoolId);
             // clear lists
             _timers.clear();
-            _domainsInProblem.clear();
+            domainsVisibility.clear();
         }
 
         public void clearPoolTimers() {
@@ -1410,7 +1287,7 @@
             log.infoFormat("Clearing cache of pool: {0} for problematic 
entities of VDS: {1}.",
                     _storagePoolId, vdsName);
 
-            clearDomainFromCache(null, Arrays.asList(vdsId));
+            domainsVisibility.removeVdsStatuses(vdsId);
         }
 
         private boolean _disposed;


-- 
To view, visit http://gerrit.ovirt.org/23370
To unsubscribe, visit http://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Id74f12d108f75924fbff28409a89bc1caff05908
Gerrit-PatchSet: 1
Gerrit-Project: ovirt-engine
Gerrit-Branch: master
Gerrit-Owner: Federico Simoncelli <[email protected]>
_______________________________________________
Engine-patches mailing list
[email protected]
http://lists.ovirt.org/mailman/listinfo/engine-patches

Reply via email to