Federico Simoncelli has uploaded a new change for review.
Change subject: vdsbroker: improve the domain visibility cache
......................................................................
vdsbroker: improve the domain visibility cache
In order to support more informed decisions, the previous domain status
cache (_domainsInProblem) has been refactored in DomainVisibility.
For each domain in the pool (also the ones in maintenance) it is kept a
map of vds and the Status:
ACTIVE: the domain is active in the database and in the vds (it is
reachable, monitored and reported)
INACTIVE: the domain is marked as in maintenance in the database and
inactive in the vds (not monitored)
UNREPORTED: the domain is active in the database but the vds is not
reporting it (not monitored)
UNREACHABLE: the domain is active in the database but the vds cannot
reach it
UNKNOWN: no status or information is available
For memory usage optimization the DomainVisibility maps are not keeping
the ACTIVE status.
Change-Id: Id74f12d108f75924fbff28409a89bc1caff05908
Signed-off-by: Federico Simoncelli <[email protected]>
---
M
backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
A
backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/DomainVisibility.java
M
backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
3 files changed, 152 insertions(+), 198 deletions(-)
git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/70/23370/1
diff --git
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
index a8f5bbf..3b4a268 100644
---
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
+++
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
@@ -281,9 +281,7 @@
// Now update the status of domains, this code should not be in
// synchronized part of code
- if (domainsList != null) {
- IrsBrokerCommand.updateVdsDomainsData(tmpVds,
storagePoolId, domainsList);
- }
+ IrsBrokerCommand.updateVdsDomainsVisibility(tmpVds,
domainsList);
} catch (Exception e) {
log.error("Timer update runtimeinfo failed. Exception:", e);
} finally {
diff --git
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/DomainVisibility.java
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/DomainVisibility.java
new file mode 100644
index 0000000..39895ad
--- /dev/null
+++
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/DomainVisibility.java
@@ -0,0 +1,79 @@
+package org.ovirt.engine.core.vdsbroker.irsbroker;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.ovirt.engine.core.compat.Guid;
+
+
+public class DomainVisibility {
+
+ public static enum Status {
+ ACTIVE,
+ INACTIVE,
+ UNREPORTED,
+ UNREACHABLE,
+ UNKNOWN;
+
+ public boolean isProblematic() {
+ return this == UNREPORTED || this == UNREACHABLE || this ==
UNKNOWN;
+ }
+ }
+
+ private Map<Guid, Map<Guid, Status>> domainsVisibility;
+
+ public DomainVisibility() {
+ this.domainsVisibility = new HashMap<>();
+ }
+
+ public void clear() {
+ this.domainsVisibility.clear();
+ }
+
+ public synchronized void updatePoolDomains(Set<Guid> domains) {
+ for (Guid domainId : domains) {
+ if (!domainsVisibility.containsKey(domainId)) {
+ domainsVisibility.put(domainId, new HashMap<Guid, Status>());
+ }
+ }
+
+ Iterator<Guid> domainsIterator = domainsVisibility.keySet().iterator();
+ while (domainsIterator.hasNext()) {
+ if (!domains.contains(domainsIterator.next())) {
+ domainsIterator.remove();
+ }
+ }
+ }
+
+ public synchronized void setVdsDomainStatus(Guid vdsId, Guid domainId,
Status status) {
+ Map<Guid, Status> domainVdsStatus = domainsVisibility.get(domainId);
+
+ if (status == Status.ACTIVE || status == Status.UNKNOWN) {
+ domainVdsStatus.remove(vdsId);
+ } else {
+ domainVdsStatus.put(vdsId, status);
+ }
+ }
+
+ public synchronized void removeVdsStatuses(Guid vdsId) {
+ for (Map<Guid, Status> domainVdsStatus : domainsVisibility.values()) {
+ domainVdsStatus.remove(vdsId);
+ }
+ }
+
+ public synchronized HashSet<Guid> getDomainProblematicVdsList(Guid
domainId) {
+ HashSet<Guid> vdsList = new HashSet<>();
+ Map<Guid, Status> domainVdsStatus = domainsVisibility.get(domainId);
+
+ for (Entry<Guid, Status> status : domainVdsStatus.entrySet()) {
+ if (status.getValue().isProblematic())
+ vdsList.add(status.getKey());
+ }
+
+ return vdsList;
+ }
+}
diff --git
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
index 10380d9..9957bb8 100644
---
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
+++
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
@@ -4,11 +4,9 @@
import java.net.SocketException;
import java.text.MessageFormat;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
-import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
@@ -90,16 +88,12 @@
/**
* process received domain monitoring information from a given vds if
necessary (according to it's status).
* @param vds
- * @param storagePoolId
* @param vdsDomainData
*/
- public static void updateVdsDomainsData(VDS vds, Guid storagePoolId,
- ArrayList<VDSDomainsData> vdsDomainData) {
- if (vds.getStatus() == VDSStatus.Up) {
- IrsProxyData proxy = _irsProxyData.get(storagePoolId);
- if (proxy != null) {
- proxy.updateVdsDomainsData(vds.getId(), vds.getName(),
vdsDomainData);
- }
+ public static void updateVdsDomainsVisibility(VDS vds,
ArrayList<VDSDomainsData> vdsDomainData) {
+ IrsProxyData proxy = _irsProxyData.get(vds.getStoragePoolId());
+ if (proxy != null) {
+ proxy.updateDomainsVisibility(vds, vdsDomainData);
}
}
@@ -1001,111 +995,64 @@
mCurrentVdsId = null;
}
- private final Map<Guid, HashSet<Guid>> _domainsInProblem = new
ConcurrentHashMap<Guid, HashSet<Guid>>();
- private final Map<Guid, String> _timers = new HashMap<Guid, String>();
+ private final Map<Guid, String> _timers = new HashMap<>();
+ private final DomainVisibility domainsVisibility = new
DomainVisibility();
- public void updateVdsDomainsData(final Guid vdsId, final String
vdsName,
- final ArrayList<VDSDomainsData> data) {
+ private static Map<Guid, VDSDomainsData>
getDataDomainsMap(ArrayList<VDSDomainsData> domainsDataList) {
+ Map<Guid, VDSDomainsData> dataDomainMap = new HashMap<>();
+ for (VDSDomainsData domainsData : domainsDataList) {
+ dataDomainMap.put(domainsData.getDomainId(), domainsData);
+ }
+ return dataDomainMap;
+ }
- Set<Guid> domainsInProblems = null;
- StoragePool storagePool =
-
DbFacade.getInstance().getStoragePoolDao().get(_storagePoolId);
- if (storagePool != null
- && (storagePool.getStatus() == StoragePoolStatus.Up ||
storagePool.getStatus() == StoragePoolStatus.NonResponsive)) {
+ private static Map<Guid, StorageDomain> getPoolDomainsMap(Guid
storagePoolId) {
+ Map<Guid, StorageDomain> poolDomainMap = new HashMap<>();
+ for (StorageDomain poolDomain :
+
DbFacade.getInstance().getStorageDomainDao().getAllForStoragePool(storagePoolId))
{
+ poolDomainMap.put(poolDomain.getId(), poolDomain);
+ }
+ return poolDomainMap;
+ }
- try {
- // build a list of all domains in pool
- // which are in status Active or Unknown
- Set<Guid> domainsInPool = new HashSet<Guid>(
-
DbFacade.getInstance().getStorageDomainStaticDao().getAllIds(
- _storagePoolId,
StorageDomainStatus.Active));
-
domainsInPool.addAll(DbFacade.getInstance().getStorageDomainStaticDao().getAllIds(
- _storagePoolId, StorageDomainStatus.Unknown));
- Set<Guid> inActiveDomainsInPool =
- new HashSet<Guid>(DbFacade.getInstance()
- .getStorageDomainStaticDao()
- .getAllIds(_storagePoolId,
StorageDomainStatus.InActive));
+ public void updateDomainsVisibility(VDS vds, ArrayList<VDSDomainsData>
domainsDataList) {
+ // Step 1: return if there's no data to analyze and remove the vds
if it is in Maintenance.
+ // It's not correct to remove the NonOperational hosts because
they may be still connected
+ // to the pool. They must be moved to Maintenance.
+ if (domainsDataList == null) {
+ if (vds.getStatus() == VDSStatus.Maintenance) {
+ domainsVisibility.removeVdsStatuses(vds.getId());
+ }
+ return;
+ }
- // build a list of all the domains in
- // pool (domainsInPool) that are not
- // visible by the host.
- List<Guid> domainsInPoolThatNonVisibleByVds = new
ArrayList<Guid>();
- Set<Guid> dataDomainIds = new HashSet<Guid>();
- for (VDSDomainsData tempData : data) {
- dataDomainIds.add(tempData.getDomainId());
- }
- for (Guid tempDomainId : domainsInPool) {
- if (!dataDomainIds.contains(tempDomainId)) {
- domainsInPoolThatNonVisibleByVds.add(tempDomainId);
- }
- }
+ // Step 2: build the domain maps for the reported domains and the
pool domains
+ Map<Guid, VDSDomainsData> dataDomainMap =
getDataDomainsMap(domainsDataList);
+ Map<Guid, StorageDomain> poolDomainMap =
getPoolDomainsMap(_storagePoolId);
- // build a list of domains that the host
- // reports as in problem (code!=0) or (code==0
- // && lastChecl >
- // ConfigValues.MaxStorageVdsTimeoutCheckSec)
- // and are contained in the Active or
- // Unknown domains in pool
- List<Guid> domainsSeenByVdsInProblem = new
ArrayList<Guid>();
- for (VDSDomainsData tempData : data) {
- if (domainsInPool.contains(tempData.getDomainId())) {
- if (isDomainReportedAsProblematic(tempData,
false)) {
-
domainsSeenByVdsInProblem.add(tempData.getDomainId());
- } else if (tempData.getDelay() > Config.<Double>
getValue(ConfigValues.MaxStorageVdsDelayCheckSec)) {
- logDelayedDomain(vdsId, tempData);
- }
- } else if
(inActiveDomainsInPool.contains(tempData.getDomainId())
- && !isDomainReportedAsProblematic(tempData,
false)) {
- log.warnFormat("Storage Domain {0} was reported by
Host {1} as Active in Pool {2}, moving to active status",
- getDomainIdTuple(tempData.getDomainId()),
- vdsName,
- _storagePoolId);
- StoragePoolIsoMap map =
- DbFacade.getInstance()
- .getStoragePoolIsoMapDao()
- .get(new
StoragePoolIsoMapId(tempData.getDomainId(), _storagePoolId));
- map.setStatus(StorageDomainStatus.Active);
-
DbFacade.getInstance().getStoragePoolIsoMapDao().update(map);
- }
- }
+ domainsVisibility.updatePoolDomains(poolDomainMap.keySet());
- // build a list of all potential domains
- // in problem
- domainsInProblems = new HashSet<Guid>();
- domainsInProblems.addAll(domainsInPoolThatNonVisibleByVds);
- domainsInProblems.addAll(domainsSeenByVdsInProblem);
+ for (StorageDomain poolDomain : poolDomainMap.values()) {
+ DomainVisibility.Status domainStatus;
+ VDSDomainsData domainData =
dataDomainMap.get(poolDomain.getId());
- } catch (RuntimeException ex) {
- log.error("error in updateVdsDomainsData", ex);
+ if (domainData == null) {
+ domainStatus = poolDomain.getStatus() ==
StorageDomainStatus.Active ?
+ DomainVisibility.Status.UNREPORTED :
DomainVisibility.Status.INACTIVE;
+ } else {
+ domainStatus = isDomainReportedAsProblematic(domainData,
false) ?
+ DomainVisibility.Status.UNREACHABLE :
DomainVisibility.Status.ACTIVE;
}
- }
- updateDomainInProblem(vdsId, vdsName, domainsInProblems);
- }
+ domainsVisibility.setVdsDomainStatus(vds.getId(),
poolDomain.getId(), domainStatus);
- private void updateDomainInProblem(final Guid vdsId, final String
vdsName, final Set<Guid> domainsInProblems) {
- if (domainsInProblems != null) {
- ((EventQueue) EjbUtils.findBean(BeanType.EVENTQUEUE_MANAGER,
BeanProxyType.LOCAL)).submitEventSync(new Event(_storagePoolId,
- null, vdsId, EventType.DOMAINMONITORING, ""),
- new Callable<EventResult>() {
- @Override
- public EventResult call() {
- EventResult result = new EventResult(true,
EventType.DOMAINMONITORING);
- updateProblematicVdsData(vdsId, vdsName,
domainsInProblems);
- return result;
- }
- });
+ if (domainStatus == DomainVisibility.Status.UNREPORTED
+ || domainStatus ==
DomainVisibility.Status.UNREACHABLE) {
+ setTimerIfAbsent(poolDomain.getId());
+ } else if
(domainsVisibility.getDomainProblematicVdsList(poolDomain.getId()).size() == 0)
{
+ clearTimer(poolDomain.getId());
+ }
}
- }
-
- private void logDelayedDomain(final Guid vdsId, VDSDomainsData
tempData) {
- AuditLogableBase logable = new AuditLogableBase();
- logable.setVdsId(vdsId);
- logable.setStorageDomainId(tempData.getDomainId());
- logable.addCustomValue("Delay",
- Double.toString(tempData.getDelay()));
- AuditLogDirector.log(logable,
- AuditLogType.VDS_DOMAIN_DELAY_INTERVAL);
}
private List<Guid>
obtainDomainsReportedAsProblematic(List<VDSDomainsData> vdsDomainsData) {
@@ -1155,55 +1102,6 @@
return false;
}
- private void updateProblematicVdsData(final Guid vdsId, final String
vdsName, Set<Guid> domainsInProblems) {
- // for all problematic domains
- // update cache of _domainsInProblem
- // and _vdssInProblem and add a new
- // timer for new domains in problem
- Set<Guid> domainsInProblemKeySet = _domainsInProblem.keySet();
- for (Guid domainId : domainsInProblems) {
- if (domainsInProblemKeySet.contains(domainId)) {
- // existing domains in problem
- updateDomainInProblemData(domainId, vdsId, vdsName);
- } else {
- // new domains in problems
- addDomainInProblemData(domainId, vdsId, vdsName);
- }
- }
- Set<Guid> notReportedDomainsByHost = new
HashSet<Guid>(_domainsInProblem.keySet());
- notReportedDomainsByHost.removeAll(domainsInProblems);
- for (Guid domainId : notReportedDomainsByHost) {
- Set<Guid> vdsForDomain = _domainsInProblem.get(domainId);
- if (vdsForDomain != null && vdsForDomain.contains(vdsId)) {
- domainRecoveredFromProblem(domainId, vdsId, vdsName);
- }
- }
- }
-
- private void domainRecoveredFromProblem(Guid domainId, Guid vdsId,
String vdsName) {
- String domainIdTuple = getDomainIdTuple(domainId);
- log.infoFormat("Domain {0} recovered from problem. vds: {1}",
domainIdTuple, vdsName);
- _domainsInProblem.get(domainId).remove(vdsId);
- if (_domainsInProblem.get(domainId).size() == 0) {
- log.infoFormat("Domain {0} has recovered from problem. No
active host in the DC is reporting it as" +
- " problematic, so clearing the domain recovery
timer.", domainIdTuple);
- _domainsInProblem.remove(domainId);
- clearTimer(domainId);
- }
- }
-
- private void addDomainInProblemData(Guid domainId, Guid vdsId, String
vdsName) {
- _domainsInProblem.put(domainId, new
java.util.HashSet<Guid>(java.util.Arrays.asList(vdsId)));
- log.warnFormat("domain {0} in problem. vds: {1}",
getDomainIdTuple(domainId), vdsName);
- Class[] inputType = new Class[] { Guid.class };
- Object[] inputParams = new Object[] { domainId };
- String jobId =
SchedulerUtilQuartzImpl.getInstance().scheduleAOneTimeJob(this, "onTimer",
inputType,
- inputParams, Config.<Integer>
getValue(ConfigValues.StorageDomainFalureTimeoutInMinutes),
- TimeUnit.MINUTES);
- clearTimer(domainId);
- _timers.put(domainId, jobId);
- }
-
@OnTimerMethodAnnotation("onTimer")
public void onTimer(final Guid domainId) {
((EventQueue) EjbUtils.findBean(BeanType.EVENTQUEUE_MANAGER,
BeanProxyType.LOCAL)).submitEventAsync(new Event(_storagePoolId,
@@ -1212,7 +1110,7 @@
@Override
public EventResult call() {
EventResult result = null;
- if (_domainsInProblem.containsKey(domainId)) {
+ if
(domainsVisibility.getDomainProblematicVdsList(domainId).size() > 0) {
log.info("starting processDomainRecovery for
domain " + getDomainIdTuple(domainId));
result = processDomainRecovery(domainId);
}
@@ -1220,11 +1118,6 @@
return result;
}
});
- }
-
- private void updateDomainInProblemData(Guid domainId, Guid vdsId,
String vdsName) {
- log.debugFormat("domain {0} still in problem. vds: {1}",
getDomainIdTuple(domainId), vdsName);
- _domainsInProblem.get(domainId).add(vdsId);
}
private EventResult processDomainRecovery(final Guid domainId) {
@@ -1245,7 +1138,7 @@
// on this domain as in problem.
// Mark the above list as hosts we suspect are in
// problem.
- Set<Guid> hostsThatReportedDomainAsInProblem =
_domainsInProblem.get(domainId);
+ Set<Guid> hostsThatReportedDomainAsInProblem =
domainsVisibility.getDomainProblematicVdsList(domainId);
List<Guid> vdssInProblem = new ArrayList<Guid>();
for (Guid tempVDSId : vdssInPool) {
if (!hostsThatReportedDomainAsInProblem.contains(tempVDSId)) {
@@ -1269,7 +1162,7 @@
// Moving all the hosts which reported on
// this domain as in problem to non
// operational.
- for (final Guid vdsId : _domainsInProblem.get(domainId)) {
+ for (final Guid vdsId :
domainsVisibility.getDomainProblematicVdsList(domainId)) {
VDS vds = vdsMap.get(vdsId);
if (vds == null) {
log.warnFormat(
@@ -1328,9 +1221,23 @@
}
}
- // clear from cache of _domainsInProblem
- clearDomainFromCache(domainId, nonOpVdss);
return result;
+ }
+
+ private void setTimerIfAbsent(Guid domainId) {
+ synchronized (_timers) {
+ if (_timers.containsKey(domainId))
+ return;
+
+ String jobId =
SchedulerUtilQuartzImpl.getInstance().scheduleAOneTimeJob(
+ this, "onTimer",
+ new Class[] { Guid.class },
+ new Object[] { domainId },
+ Config.<Integer>
getValue(ConfigValues.StorageDomainFalureTimeoutInMinutes),
+ TimeUnit.MINUTES);
+
+ _timers.put(domainId, jobId);
+ }
}
/**
@@ -1347,43 +1254,13 @@
}
/**
- * clears the problematic domain from the vdss that reported on this
domain as problematic and from the domains
- * in problem
- *
- * @param domainId
- * - the domain to clear cache for.
- * @param nonOpVdss - passed vdss that non operational
- */
- private void clearDomainFromCache(Guid domainId, List<Guid> nonOpVdss)
{
- if (domainId != null) {
- _domainsInProblem.remove(domainId);
- }
- removeVdsAsProblematic(nonOpVdss);
- }
-
- private void removeVdsAsProblematic(List<Guid> nonOpVdss) {
- Iterator<Map.Entry<Guid, HashSet<Guid>>> iterDomainsInProblem =
_domainsInProblem.entrySet().iterator();
- while (iterDomainsInProblem.hasNext()) {
- Map.Entry<Guid, HashSet<Guid>> entry =
iterDomainsInProblem.next();
- entry.getValue().removeAll(nonOpVdss);
- if (entry.getValue().isEmpty()) {
- iterDomainsInProblem.remove();
- clearTimer(entry.getKey());
- log.infoFormat("Domain {0} has recovered from problem. No
active host in the DC is reporting it as poblematic, so clearing the domain
recovery timer.",
- getDomainIdTuple(entry.getKey()));
- }
-
- }
- }
-
- /**
* deletes all the jobs for the domains in the pool and clears the
problematic entities caches.
*/
public void clearCache() {
log.info("clearing cache for problematic entities in pool " +
_storagePoolId);
// clear lists
_timers.clear();
- _domainsInProblem.clear();
+ domainsVisibility.clear();
}
public void clearPoolTimers() {
@@ -1410,7 +1287,7 @@
log.infoFormat("Clearing cache of pool: {0} for problematic
entities of VDS: {1}.",
_storagePoolId, vdsName);
- clearDomainFromCache(null, Arrays.asList(vdsId));
+ domainsVisibility.removeVdsStatuses(vdsId);
}
private boolean _disposed;
--
To view, visit http://gerrit.ovirt.org/23370
To unsubscribe, visit http://gerrit.ovirt.org/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Id74f12d108f75924fbff28409a89bc1caff05908
Gerrit-PatchSet: 1
Gerrit-Project: ovirt-engine
Gerrit-Branch: master
Gerrit-Owner: Federico Simoncelli <[email protected]>
_______________________________________________
Engine-patches mailing list
[email protected]
http://lists.ovirt.org/mailman/listinfo/engine-patches