Copilot commented on code in PR #11789:
URL: https://github.com/apache/cloudstack/pull/11789#discussion_r2413860712
##########
server/src/main/java/com/cloud/storage/StoragePoolAutomationImpl.java:
##########
@@ -106,235 +72,281 @@ public class StoragePoolAutomationImpl implements
StoragePoolAutomation {
@Inject
ManagementServer server;
@Inject
- DataStoreProviderManager providerMgr;
- @Inject
StorageManager storageManager;
@Override
public boolean maintain(DataStore store) {
- Long userId = CallContext.current().getCallingUserId();
- User user = _userDao.findById(userId);
- Account account = CallContext.current().getCallingAccount();
StoragePoolVO pool = primaryDataStoreDao.findById(store.getId());
try {
- List<StoragePoolVO> spes = null;
- // Handling Zone and Cluster wide storage scopes.
- // if the storage is ZONE wide then we pass podid and cluster id
as null as they will be empty for ZWPS
- if (pool.getScope() == ScopeType.ZONE) {
- spes = primaryDataStoreDao.listBy(pool.getDataCenterId(),
null, null, ScopeType.ZONE);
- } else {
- spes = primaryDataStoreDao.listBy(pool.getDataCenterId(),
pool.getPodId(), pool.getClusterId(), ScopeType.CLUSTER);
- }
- for (StoragePoolVO sp : spes) {
- if (sp.getParent() != pool.getParent() && sp.getId() !=
pool.getParent()) { // If Datastore cluster is tried to prepare for maintenance
then child storage pools are also kept in PrepareForMaintenance mode
- if (sp.getStatus() ==
StoragePoolStatus.PrepareForMaintenance) {
- throw new CloudRuntimeException(String.format("Only
one storage pool in a cluster can be in PrepareForMaintenance mode, %s is
already in PrepareForMaintenance mode ", sp));
- }
- }
- }
- StoragePool storagePool = (StoragePool)store;
-
- //Handeling the Zone wide and cluster wide primay storage
- List<HostVO> hosts = new ArrayList<HostVO>();
- // if the storage scope is ZONE wide, then get all the hosts for
which hypervisor ZWSP created to send Modifystoragepoolcommand
- //TODO: if it's zone wide, this code will list a lot of hosts in
the zone, which may cause performance/OOM issue.
- if (pool.getScope().equals(ScopeType.ZONE)) {
- if (HypervisorType.Any.equals(pool.getHypervisor())) {
- hosts =
_resourceMgr.listAllUpAndEnabledHostsInOneZone(pool.getDataCenterId());
- }
- else {
- hosts =
_resourceMgr.listAllUpAndEnabledHostsInOneZoneByHypervisor(pool.getHypervisor(),
pool.getDataCenterId());
- }
- } else {
- hosts =
_resourceMgr.listHostsInClusterByStatus(pool.getClusterId(), Status.Up);
+ getStoragePoolForSpecification(pool);
+
+ List<HostVO> hosts = getHostsForStoragePool(pool);
+
+ if (setNextStateForMaintenance(hosts, pool) ==
StoragePoolStatus.PrepareForMaintenance) {
+ removeHeartbeatForHostsFromPool(hosts, pool);
+ // check to see if other ps exist
+ // if they do, then we can migrate over the system vms to them
+ // if they don't, then just stop all vms on this one
+ List<StoragePoolVO> upPools =
primaryDataStoreDao.listByStatusInZone(pool.getDataCenterId(),
StoragePoolStatus.Up);
+ boolean restart = !CollectionUtils.isEmpty(upPools);
+
+ // 2. Get a list of all the ROOT volumes within this storage
pool
+ List<VolumeVO> allVolumes =
volumeDao.findByPoolId(pool.getId());
+ // 3. Enqueue to the work queue
+ enqueueMigrationsForVolumes(allVolumes, pool);
+ // 4. Process the queue
+ processMigrationWorkloads(pool, restart);
}
+ } catch (Exception e) {
+ logger.error("Exception in enabling primary storage maintenance:",
e);
+ pool.setStatus(StoragePoolStatus.ErrorInMaintenance);
+ primaryDataStoreDao.update(pool.getId(), pool);
+ // TODO decide on what recovery is possible
+ throw new CloudRuntimeException(e.getMessage());
+ }
+ return true;
+ }
- if (hosts == null || hosts.size() == 0) {
- pool.setStatus(StoragePoolStatus.Maintenance);
- primaryDataStoreDao.update(pool.getId(), pool);
- return true;
- } else {
- // set the pool state to prepare for maintenance
- pool.setStatus(StoragePoolStatus.PrepareForMaintenance);
- primaryDataStoreDao.update(pool.getId(), pool);
- }
- // remove heartbeat
- for (HostVO host : hosts) {
- ModifyStoragePoolCommand cmd = new
ModifyStoragePoolCommand(false, storagePool);
- final Answer answer = agentMgr.easySend(host.getId(), cmd);
- if (answer == null || !answer.getResult()) {
- if (logger.isDebugEnabled()) {
- logger.debug("ModifyStoragePool false failed due to "
+ ((answer == null) ? "answer null" : answer.getDetails()));
- }
- } else {
- if (logger.isDebugEnabled()) {
- logger.debug("ModifyStoragePool false succeeded");
- }
- if (pool.getPoolType() ==
Storage.StoragePoolType.DatastoreCluster) {
- logger.debug("Started synchronising datastore cluster
storage pool {} with vCenter", pool);
-
storageManager.syncDatastoreClusterStoragePool(pool.getId(),
((ModifyStoragePoolAnswer) answer).getDatastoreClusterChildren(), host.getId());
- }
- }
- }
- // check to see if other ps exist
- // if they do, then we can migrate over the system vms to them
- // if they don't, then just stop all vms on this one
- List<StoragePoolVO> upPools =
primaryDataStoreDao.listByStatusInZone(pool.getDataCenterId(),
StoragePoolStatus.Up);
- boolean restart = true;
- if (upPools == null || upPools.size() == 0) {
- restart = false;
- }
+ @Override
+ public boolean cancelMaintain(DataStore store) {
+ // Change the storage state back to up
+ StoragePoolVO poolVO = primaryDataStoreDao.findById(store.getId());
+ StoragePool pool = (StoragePool)store;
- // 2. Get a list of all the ROOT volumes within this storage pool
- List<VolumeVO> allVolumes = volumeDao.findByPoolId(pool.getId());
+ List<HostVO> hosts = getHostsForStoragePool(poolVO);
- // 3. Enqueue to the work queue
- for (VolumeVO volume : allVolumes) {
- VMInstanceVO vmInstance =
vmDao.findById(volume.getInstanceId());
+ if (CollectionUtils.isEmpty(hosts)) {
+ return true;
+ }
- if (vmInstance == null) {
- continue;
- }
+ Pair<Map<String, String>, Boolean> nfsMountOpts =
storageManager.getStoragePoolNFSMountOpts(pool, null);
+ addHeartbeatToHostsInPool(hosts, pool, nfsMountOpts);
- // enqueue sp work
- if (vmInstance.getState().equals(State.Running) ||
vmInstance.getState().equals(State.Starting) ||
vmInstance.getState().equals(State.Stopping)) {
-
- try {
- StoragePoolWorkVO work = new
StoragePoolWorkVO(vmInstance.getId(), pool.getId(), false, false,
server.getId());
- _storagePoolWorkDao.persist(work);
- } catch (Exception e) {
- if (logger.isDebugEnabled()) {
- logger.debug("Work record already exists, re-using
by re-setting values");
- }
- StoragePoolWorkVO work =
_storagePoolWorkDao.findByPoolIdAndVmId(pool.getId(), vmInstance.getId());
- work.setStartedAfterMaintenance(false);
- work.setStoppedForMaintenance(false);
- work.setManagementServerId(server.getId());
- _storagePoolWorkDao.update(work.getId(), work);
- }
- }
+ // 2. Get a list of pending work for this queue
+ List<StoragePoolWorkVO> pendingWork =
_storagePoolWorkDao.listPendingWorkForCancelMaintenanceByPoolId(poolVO.getId());
+
+ // 3. work through the queue
+ cancelMigrationWorkloads(pendingWork);
+ return false;
+ }
+
+ private StoragePoolStatus setNextStateForMaintenance(List<HostVO> hosts,
StoragePoolVO pool) {
+ if (CollectionUtils.isEmpty(hosts)) {
+ pool.setStatus(StoragePoolStatus.Maintenance);
+ primaryDataStoreDao.update(pool.getId(), pool);
+ return StoragePoolStatus.Maintenance;
+ } else {
+ // set the pool state to prepare for maintenance
+ pool.setStatus(StoragePoolStatus.PrepareForMaintenance);
+ primaryDataStoreDao.update(pool.getId(), pool);
+ return StoragePoolStatus.PrepareForMaintenance;
+ }
+ }
+
+ private void processMigrationWorkloads(StoragePoolVO pool, boolean
restart) throws ResourceUnavailableException, OperationTimedoutException,
InsufficientCapacityException {
+ List<StoragePoolWorkVO> pendingWork =
_storagePoolWorkDao.listPendingWorkForPrepareForMaintenanceByPoolId(pool.getId());
+
+ for (StoragePoolWorkVO work : pendingWork) {
+ // shut down the running vms
+ VMInstanceVO vmInstance = vmDao.findById(work.getVmId());
+
+ if (vmInstance == null) {
+ continue;
}
- // 4. Process the queue
- List<StoragePoolWorkVO> pendingWork =
_storagePoolWorkDao.listPendingWorkForPrepareForMaintenanceByPoolId(pool.getId());
+ switch (vmInstance.getType()) {
+ case ConsoleProxy:
+ case SecondaryStorageVm:
+ case DomainRouter:
+ handleVmMigration(restart, work, vmInstance);
+ break;
+ case User:
+ handleStopVmForMigration(work, vmInstance);
+ break;
+ }
+ }
+ }
- for (StoragePoolWorkVO work : pendingWork) {
- // shut down the running vms
+ private void cancelMigrationWorkloads(List<StoragePoolWorkVO> pendingWork)
{
+ for (StoragePoolWorkVO work : pendingWork) {
+ try {
VMInstanceVO vmInstance = vmDao.findById(work.getVmId());
if (vmInstance == null) {
continue;
}
- // if the instance is of type consoleproxy, call the console
- // proxy
- if
(vmInstance.getType().equals(VirtualMachine.Type.ConsoleProxy)) {
- // call the consoleproxymanager
- ConsoleProxyVO consoleProxy =
_consoleProxyDao.findById(vmInstance.getId());
- vmMgr.advanceStop(consoleProxy.getUuid(), false);
- // update work status
- work.setStoppedForMaintenance(true);
- _storagePoolWorkDao.update(work.getId(), work);
+ switch (vmInstance.getType()) {
+ case ConsoleProxy:
+ case SecondaryStorageVm:
+ case DomainRouter:
+ handleVmStart(work, vmInstance);
+ break;
+ case User:
+ handleUserVmStart(work, vmInstance);
+ break;
+ }
+ } catch (Exception e) {
+ logger.debug("Failed start vm", e);
+ throw new CloudRuntimeException(e.toString());
+ }
+ }
+ }
- if (restart) {
+ private void handleStopVmForMigration(StoragePoolWorkVO work, VMInstanceVO
vmInstance) throws ResourceUnavailableException, OperationTimedoutException {
+ vmMgr.advanceStop(vmInstance.getUuid(), false);
+ // update work status
+ work.setStoppedForMaintenance(true);
+ _storagePoolWorkDao.update(work.getId(), work);
+ }
- vmMgr.advanceStart(consoleProxy.getUuid(), null, null);
- // update work status
- work.setStartedAfterMaintenance(true);
- _storagePoolWorkDao.update(work.getId(), work);
- }
- }
+ private void handleVmMigration(boolean restart, StoragePoolWorkVO work,
VMInstanceVO vmInstance) throws ResourceUnavailableException,
OperationTimedoutException, InsufficientCapacityException {
+ handleStopVmForMigration(work, vmInstance);
- // if the instance is of type uservm, call the user vm manager
- if (vmInstance.getType() == VirtualMachine.Type.User) {
- UserVmVO userVm = userVmDao.findById(vmInstance.getId());
- vmMgr.advanceStop(userVm.getUuid(), false);
- // update work status
- work.setStoppedForMaintenance(true);
- _storagePoolWorkDao.update(work.getId(), work);
- }
+ if (restart) {
+ handleVmStart(work, vmInstance);
+ }
+ }
+ private void handleVmStart(StoragePoolWorkVO work, VMInstanceVO
vmInstance) throws InsufficientCapacityException, ResourceUnavailableException,
OperationTimedoutException {
+ vmMgr.advanceStart(vmInstance.getUuid(), null, null);
+ // update work queue
+ work.setStartedAfterMaintenance(true);
+ _storagePoolWorkDao.update(work.getId(), work);
+ }
- // if the instance is of type secondary storage vm, call the
- // secondary storage vm manager
- if
(vmInstance.getType().equals(VirtualMachine.Type.SecondaryStorageVm)) {
- SecondaryStorageVmVO secStrgVm =
_secStrgDao.findById(vmInstance.getId());
- vmMgr.advanceStop(secStrgVm.getUuid(), false);
- // update work status
- work.setStoppedForMaintenance(true);
- _storagePoolWorkDao.update(work.getId(), work);
+ private void enqueueMigrationsForVolumes(List<VolumeVO> allVolumes,
StoragePoolVO pool) {
+ for (VolumeVO volume : allVolumes) {
+ VMInstanceVO vmInstance = vmDao.findById(volume.getInstanceId());
- if (restart) {
- vmMgr.advanceStart(secStrgVm.getUuid(), null, null);
- // update work status
- work.setStartedAfterMaintenance(true);
- _storagePoolWorkDao.update(work.getId(), work);
- }
- }
+ if (vmInstance == null) {
+ continue;
+ }
- // if the instance is of type domain router vm, call the
network
- // manager
- if
(vmInstance.getType().equals(VirtualMachine.Type.DomainRouter)) {
- DomainRouterVO domR =
_domrDao.findById(vmInstance.getId());
- vmMgr.advanceStop(domR.getUuid(), false);
- // update work status
- work.setStoppedForMaintenance(true);
- _storagePoolWorkDao.update(work.getId(), work);
+ // enqueue sp work
+ if (vmInstance.getState().equals(State.Running) ||
vmInstance.getState().equals(State.Starting) ||
vmInstance.getState().equals(State.Stopping)) {
- if (restart) {
- vmMgr.advanceStart(domR.getUuid(), null, null);
- // update work status
- work.setStartedAfterMaintenance(true);
- _storagePoolWorkDao.update(work.getId(), work);
+ try {
+ StoragePoolWorkVO work = new
StoragePoolWorkVO(vmInstance.getId(), pool.getId(), false, false,
server.getId());
+ _storagePoolWorkDao.persist(work);
+ } catch (Exception e) {
+ if (logger.isDebugEnabled()) {
+ logger.debug("Work record already exists, re-using by
re-setting values");
}
+ StoragePoolWorkVO work =
_storagePoolWorkDao.findByPoolIdAndVmId(pool.getId(), vmInstance.getId());
+ work.setStartedAfterMaintenance(false);
+ work.setStoppedForMaintenance(false);
+ work.setManagementServerId(server.getId());
+ _storagePoolWorkDao.update(work.getId(), work);
}
}
- } catch (Exception e) {
- logger.error("Exception in enabling primary storage maintenance:",
e);
- pool.setStatus(StoragePoolStatus.ErrorInMaintenance);
- primaryDataStoreDao.update(pool.getId(), pool);
- throw new CloudRuntimeException(e.getMessage());
}
- return true;
}
- @Override
- public boolean cancelMaintain(DataStore store) {
- // Change the storage state back to up
- Long userId = CallContext.current().getCallingUserId();
- User user = _userDao.findById(userId);
- Account account = CallContext.current().getCallingAccount();
- StoragePoolVO poolVO = primaryDataStoreDao.findById(store.getId());
- StoragePool pool = (StoragePool)store;
+ private void removeHeartbeatForHostsFromPool(List<HostVO> hosts,
StoragePool storagePool) {
+ // remove heartbeat
+ for (HostVO host : hosts) {
+ ModifyStoragePoolCommand cmd = new ModifyStoragePoolCommand(false,
storagePool);
+ final Answer answer = agentMgr.easySend(host.getId(), cmd);
+ if (answer == null || !answer.getResult()) {
+ if (logger.isDebugEnabled()) {
+ logger.debug("ModifyStoragePool false failed due to {}",
((answer == null) ? "answer null" : answer.getDetails()));
+ }
+ } else {
+ reportSucceededModifyStorePool(storagePool,
(ModifyStoragePoolAnswer) answer, host, false);
+ }
+ }
+ }
- //Handeling the Zone wide and cluster wide primay storage
- List<HostVO> hosts = new ArrayList<HostVO>();
- // if the storage scope is ZONE wide, then get all the hosts for which
hypervisor ZWSP created to send Modifystoragepoolcommand
- if (poolVO.getScope().equals(ScopeType.ZONE)) {
+ private void reportSucceededModifyStorePool(StoragePool storagePool,
ModifyStoragePoolAnswer answer, HostVO host, boolean add) {
+ if (logger.isDebugEnabled()) {
+ logger.debug("ModifyStoragePool succeeded for {}", add ? "adding"
: "removing");
+ }
+ if (storagePool.getPoolType() ==
Storage.StoragePoolType.DatastoreCluster) {
+ logger.debug("Started synchronising datastore cluster storage pool
{} with vCenter", storagePool);
+
storageManager.syncDatastoreClusterStoragePool(storagePool.getId(),
answer.getDatastoreClusterChildren(), host.getId());
+ }
+ }
+
+ /**
+ * Handling the Zone wide and cluster wide primary storage
+ * if the storage scope is ZONE wide, then get all the hosts for which
hypervisor ZoneWideStoragePools created to send ModifyStoragePoolCommand
+ * TODO: if it's zone wide, this code will list a lot of hosts in the
zone, which may cause performance/OOM issue.
+ * @param pool pool to check for connected hosts
+ * @return a list of connected hosts
+ */
+ private List<HostVO> getHostsForStoragePool(StoragePoolVO pool) {
+ List<HostVO> hosts;
+ if (pool.getScope().equals(ScopeType.ZONE)) {
if (HypervisorType.Any.equals(pool.getHypervisor())) {
hosts =
_resourceMgr.listAllUpAndEnabledHostsInOneZone(pool.getDataCenterId());
}
else {
- hosts =
_resourceMgr.listAllUpAndEnabledHostsInOneZoneByHypervisor(poolVO.getHypervisor(),
pool.getDataCenterId());
+ hosts =
_resourceMgr.listAllUpAndEnabledHostsInOneZoneByHypervisor(pool.getHypervisor(),
pool.getDataCenterId());
}
} else {
hosts =
_resourceMgr.listHostsInClusterByStatus(pool.getClusterId(), Status.Up);
}
+ return hosts;
+ }
- if (hosts == null || hosts.size() == 0) {
- return true;
+ /**
+ * Handling Zone and Cluster wide storage scopes. Depending on the scope
of the pool, check for other storage pools in the same scope
+ * If the storage is ZONE wide then we pass podId and cluster id as null
as they will be empty for Zone wide storage
+ *
+ * @param pool pool to check for other pools in the same scope
+ */
+ private void getStoragePoolForSpecification(StoragePoolVO pool) {
+ List<StoragePoolVO> storagePools;
+ if (pool.getScope() == ScopeType.ZONE) {
+ storagePools = primaryDataStoreDao.listBy(pool.getDataCenterId(),
null, null, ScopeType.ZONE);
+ } else {
+ storagePools = primaryDataStoreDao.listBy(pool.getDataCenterId(),
pool.getPodId(), pool.getClusterId(), ScopeType.CLUSTER);
}
+ checkHierarchyForPreparingForMaintenance(pool, storagePools);
+ }
- Pair<Map<String, String>, Boolean> nfsMountOpts =
storageManager.getStoragePoolNFSMountOpts(pool, null);
- // add heartbeat
+ /**
+ * If Datastore cluster is tried to prepare for maintenance then child
storage pools are also kept in PrepareForMaintenance mode
+ * @param pool target to put in maintenance
+ * @param storagePools list of possible peers/parents/children
+ */
+ private static void checkHierarchyForPreparingForMaintenance(StoragePoolVO
pool, List<StoragePoolVO> storagePools) {
+ for (StoragePoolVO storagePool : storagePools) {
+ if (!(storagePool.getParent().equals(pool.getParent()) ||
!pool.getParent().equals(storagePool.getId())) &&
+ (storagePool.getStatus() ==
StoragePoolStatus.PrepareForMaintenance)) {
+ throw new CloudRuntimeException(String.format("Only one
storage pool in a cluster can be in PrepareForMaintenance mode, %s is already
in PrepareForMaintenance mode ", storagePool));
+ }
+ }
+ }
+
+ /**
+ * // check if the vm has a root volume. If not, remove the item
from the queue, the vm should be
+ * // started only when it has at least one root volume attached
to it
+ * // don't allow to start vm that doesn't have a root volume
Review Comment:
The JavaDoc comment is malformed. It should use proper JavaDoc format
instead of starting each line with `* //`. These appear to be regular comments
that should either be removed or converted to proper JavaDoc.
```suggestion
* Checks if the VM has a root volume. If not, removes the item from the
queue.
* The VM should be started only when it has at least one root volume
attached to it.
* Do not allow starting a VM that does not have a root volume.
```
##########
server/src/main/java/com/cloud/storage/StoragePoolAutomationImpl.java:
##########
@@ -106,235 +72,281 @@ public class StoragePoolAutomationImpl implements
StoragePoolAutomation {
@Inject
ManagementServer server;
@Inject
- DataStoreProviderManager providerMgr;
- @Inject
StorageManager storageManager;
@Override
public boolean maintain(DataStore store) {
- Long userId = CallContext.current().getCallingUserId();
- User user = _userDao.findById(userId);
- Account account = CallContext.current().getCallingAccount();
StoragePoolVO pool = primaryDataStoreDao.findById(store.getId());
try {
- List<StoragePoolVO> spes = null;
- // Handling Zone and Cluster wide storage scopes.
- // if the storage is ZONE wide then we pass podid and cluster id
as null as they will be empty for ZWPS
- if (pool.getScope() == ScopeType.ZONE) {
- spes = primaryDataStoreDao.listBy(pool.getDataCenterId(),
null, null, ScopeType.ZONE);
- } else {
- spes = primaryDataStoreDao.listBy(pool.getDataCenterId(),
pool.getPodId(), pool.getClusterId(), ScopeType.CLUSTER);
- }
- for (StoragePoolVO sp : spes) {
- if (sp.getParent() != pool.getParent() && sp.getId() !=
pool.getParent()) { // If Datastore cluster is tried to prepare for maintenance
then child storage pools are also kept in PrepareForMaintenance mode
- if (sp.getStatus() ==
StoragePoolStatus.PrepareForMaintenance) {
- throw new CloudRuntimeException(String.format("Only
one storage pool in a cluster can be in PrepareForMaintenance mode, %s is
already in PrepareForMaintenance mode ", sp));
- }
- }
- }
- StoragePool storagePool = (StoragePool)store;
-
- //Handeling the Zone wide and cluster wide primay storage
- List<HostVO> hosts = new ArrayList<HostVO>();
- // if the storage scope is ZONE wide, then get all the hosts for
which hypervisor ZWSP created to send Modifystoragepoolcommand
- //TODO: if it's zone wide, this code will list a lot of hosts in
the zone, which may cause performance/OOM issue.
- if (pool.getScope().equals(ScopeType.ZONE)) {
- if (HypervisorType.Any.equals(pool.getHypervisor())) {
- hosts =
_resourceMgr.listAllUpAndEnabledHostsInOneZone(pool.getDataCenterId());
- }
- else {
- hosts =
_resourceMgr.listAllUpAndEnabledHostsInOneZoneByHypervisor(pool.getHypervisor(),
pool.getDataCenterId());
- }
- } else {
- hosts =
_resourceMgr.listHostsInClusterByStatus(pool.getClusterId(), Status.Up);
+ getStoragePoolForSpecification(pool);
+
+ List<HostVO> hosts = getHostsForStoragePool(pool);
+
+ if (setNextStateForMaintenance(hosts, pool) ==
StoragePoolStatus.PrepareForMaintenance) {
+ removeHeartbeatForHostsFromPool(hosts, pool);
+ // check to see if other ps exist
+ // if they do, then we can migrate over the system vms to them
+ // if they don't, then just stop all vms on this one
+ List<StoragePoolVO> upPools =
primaryDataStoreDao.listByStatusInZone(pool.getDataCenterId(),
StoragePoolStatus.Up);
+ boolean restart = !CollectionUtils.isEmpty(upPools);
+
+ // 2. Get a list of all the ROOT volumes within this storage
pool
+ List<VolumeVO> allVolumes =
volumeDao.findByPoolId(pool.getId());
+ // 3. Enqueue to the work queue
+ enqueueMigrationsForVolumes(allVolumes, pool);
+ // 4. Process the queue
+ processMigrationWorkloads(pool, restart);
}
+ } catch (Exception e) {
+ logger.error("Exception in enabling primary storage maintenance:",
e);
+ pool.setStatus(StoragePoolStatus.ErrorInMaintenance);
+ primaryDataStoreDao.update(pool.getId(), pool);
+ // TODO decide on what recovery is possible
+ throw new CloudRuntimeException(e.getMessage());
+ }
+ return true;
+ }
- if (hosts == null || hosts.size() == 0) {
- pool.setStatus(StoragePoolStatus.Maintenance);
- primaryDataStoreDao.update(pool.getId(), pool);
- return true;
- } else {
- // set the pool state to prepare for maintenance
- pool.setStatus(StoragePoolStatus.PrepareForMaintenance);
- primaryDataStoreDao.update(pool.getId(), pool);
- }
- // remove heartbeat
- for (HostVO host : hosts) {
- ModifyStoragePoolCommand cmd = new
ModifyStoragePoolCommand(false, storagePool);
- final Answer answer = agentMgr.easySend(host.getId(), cmd);
- if (answer == null || !answer.getResult()) {
- if (logger.isDebugEnabled()) {
- logger.debug("ModifyStoragePool false failed due to "
+ ((answer == null) ? "answer null" : answer.getDetails()));
- }
- } else {
- if (logger.isDebugEnabled()) {
- logger.debug("ModifyStoragePool false succeeded");
- }
- if (pool.getPoolType() ==
Storage.StoragePoolType.DatastoreCluster) {
- logger.debug("Started synchronising datastore cluster
storage pool {} with vCenter", pool);
-
storageManager.syncDatastoreClusterStoragePool(pool.getId(),
((ModifyStoragePoolAnswer) answer).getDatastoreClusterChildren(), host.getId());
- }
- }
- }
- // check to see if other ps exist
- // if they do, then we can migrate over the system vms to them
- // if they don't, then just stop all vms on this one
- List<StoragePoolVO> upPools =
primaryDataStoreDao.listByStatusInZone(pool.getDataCenterId(),
StoragePoolStatus.Up);
- boolean restart = true;
- if (upPools == null || upPools.size() == 0) {
- restart = false;
- }
+ @Override
+ public boolean cancelMaintain(DataStore store) {
+ // Change the storage state back to up
+ StoragePoolVO poolVO = primaryDataStoreDao.findById(store.getId());
+ StoragePool pool = (StoragePool)store;
- // 2. Get a list of all the ROOT volumes within this storage pool
- List<VolumeVO> allVolumes = volumeDao.findByPoolId(pool.getId());
+ List<HostVO> hosts = getHostsForStoragePool(poolVO);
- // 3. Enqueue to the work queue
- for (VolumeVO volume : allVolumes) {
- VMInstanceVO vmInstance =
vmDao.findById(volume.getInstanceId());
+ if (CollectionUtils.isEmpty(hosts)) {
+ return true;
+ }
- if (vmInstance == null) {
- continue;
- }
+ Pair<Map<String, String>, Boolean> nfsMountOpts =
storageManager.getStoragePoolNFSMountOpts(pool, null);
+ addHeartbeatToHostsInPool(hosts, pool, nfsMountOpts);
- // enqueue sp work
- if (vmInstance.getState().equals(State.Running) ||
vmInstance.getState().equals(State.Starting) ||
vmInstance.getState().equals(State.Stopping)) {
-
- try {
- StoragePoolWorkVO work = new
StoragePoolWorkVO(vmInstance.getId(), pool.getId(), false, false,
server.getId());
- _storagePoolWorkDao.persist(work);
- } catch (Exception e) {
- if (logger.isDebugEnabled()) {
- logger.debug("Work record already exists, re-using
by re-setting values");
- }
- StoragePoolWorkVO work =
_storagePoolWorkDao.findByPoolIdAndVmId(pool.getId(), vmInstance.getId());
- work.setStartedAfterMaintenance(false);
- work.setStoppedForMaintenance(false);
- work.setManagementServerId(server.getId());
- _storagePoolWorkDao.update(work.getId(), work);
- }
- }
+ // 2. Get a list of pending work for this queue
+ List<StoragePoolWorkVO> pendingWork =
_storagePoolWorkDao.listPendingWorkForCancelMaintenanceByPoolId(poolVO.getId());
+
+ // 3. work through the queue
+ cancelMigrationWorkloads(pendingWork);
+ return false;
+ }
+
+ private StoragePoolStatus setNextStateForMaintenance(List<HostVO> hosts,
StoragePoolVO pool) {
+ if (CollectionUtils.isEmpty(hosts)) {
+ pool.setStatus(StoragePoolStatus.Maintenance);
+ primaryDataStoreDao.update(pool.getId(), pool);
+ return StoragePoolStatus.Maintenance;
+ } else {
+ // set the pool state to prepare for maintenance
+ pool.setStatus(StoragePoolStatus.PrepareForMaintenance);
+ primaryDataStoreDao.update(pool.getId(), pool);
+ return StoragePoolStatus.PrepareForMaintenance;
+ }
+ }
+
+ private void processMigrationWorkloads(StoragePoolVO pool, boolean
restart) throws ResourceUnavailableException, OperationTimedoutException,
InsufficientCapacityException {
+ List<StoragePoolWorkVO> pendingWork =
_storagePoolWorkDao.listPendingWorkForPrepareForMaintenanceByPoolId(pool.getId());
+
+ for (StoragePoolWorkVO work : pendingWork) {
+ // shut down the running vms
+ VMInstanceVO vmInstance = vmDao.findById(work.getVmId());
+
+ if (vmInstance == null) {
+ continue;
}
- // 4. Process the queue
- List<StoragePoolWorkVO> pendingWork =
_storagePoolWorkDao.listPendingWorkForPrepareForMaintenanceByPoolId(pool.getId());
+ switch (vmInstance.getType()) {
+ case ConsoleProxy:
+ case SecondaryStorageVm:
+ case DomainRouter:
+ handleVmMigration(restart, work, vmInstance);
+ break;
+ case User:
+ handleStopVmForMigration(work, vmInstance);
+ break;
+ }
+ }
+ }
- for (StoragePoolWorkVO work : pendingWork) {
- // shut down the running vms
+ private void cancelMigrationWorkloads(List<StoragePoolWorkVO> pendingWork)
{
+ for (StoragePoolWorkVO work : pendingWork) {
+ try {
VMInstanceVO vmInstance = vmDao.findById(work.getVmId());
if (vmInstance == null) {
continue;
}
- // if the instance is of type consoleproxy, call the console
- // proxy
- if
(vmInstance.getType().equals(VirtualMachine.Type.ConsoleProxy)) {
- // call the consoleproxymanager
- ConsoleProxyVO consoleProxy =
_consoleProxyDao.findById(vmInstance.getId());
- vmMgr.advanceStop(consoleProxy.getUuid(), false);
- // update work status
- work.setStoppedForMaintenance(true);
- _storagePoolWorkDao.update(work.getId(), work);
+ switch (vmInstance.getType()) {
+ case ConsoleProxy:
+ case SecondaryStorageVm:
+ case DomainRouter:
+ handleVmStart(work, vmInstance);
+ break;
+ case User:
+ handleUserVmStart(work, vmInstance);
+ break;
+ }
+ } catch (Exception e) {
+ logger.debug("Failed start vm", e);
+ throw new CloudRuntimeException(e.toString());
+ }
+ }
+ }
- if (restart) {
+ private void handleStopVmForMigration(StoragePoolWorkVO work, VMInstanceVO
vmInstance) throws ResourceUnavailableException, OperationTimedoutException {
+ vmMgr.advanceStop(vmInstance.getUuid(), false);
+ // update work status
+ work.setStoppedForMaintenance(true);
+ _storagePoolWorkDao.update(work.getId(), work);
+ }
- vmMgr.advanceStart(consoleProxy.getUuid(), null, null);
- // update work status
- work.setStartedAfterMaintenance(true);
- _storagePoolWorkDao.update(work.getId(), work);
- }
- }
+ private void handleVmMigration(boolean restart, StoragePoolWorkVO work,
VMInstanceVO vmInstance) throws ResourceUnavailableException,
OperationTimedoutException, InsufficientCapacityException {
+ handleStopVmForMigration(work, vmInstance);
- // if the instance is of type uservm, call the user vm manager
- if (vmInstance.getType() == VirtualMachine.Type.User) {
- UserVmVO userVm = userVmDao.findById(vmInstance.getId());
- vmMgr.advanceStop(userVm.getUuid(), false);
- // update work status
- work.setStoppedForMaintenance(true);
- _storagePoolWorkDao.update(work.getId(), work);
- }
+ if (restart) {
+ handleVmStart(work, vmInstance);
+ }
+ }
+ private void handleVmStart(StoragePoolWorkVO work, VMInstanceVO
vmInstance) throws InsufficientCapacityException, ResourceUnavailableException,
OperationTimedoutException {
+ vmMgr.advanceStart(vmInstance.getUuid(), null, null);
+ // update work queue
+ work.setStartedAfterMaintenance(true);
+ _storagePoolWorkDao.update(work.getId(), work);
+ }
- // if the instance is of type secondary storage vm, call the
- // secondary storage vm manager
- if
(vmInstance.getType().equals(VirtualMachine.Type.SecondaryStorageVm)) {
- SecondaryStorageVmVO secStrgVm =
_secStrgDao.findById(vmInstance.getId());
- vmMgr.advanceStop(secStrgVm.getUuid(), false);
- // update work status
- work.setStoppedForMaintenance(true);
- _storagePoolWorkDao.update(work.getId(), work);
+ private void enqueueMigrationsForVolumes(List<VolumeVO> allVolumes,
StoragePoolVO pool) {
+ for (VolumeVO volume : allVolumes) {
+ VMInstanceVO vmInstance = vmDao.findById(volume.getInstanceId());
- if (restart) {
- vmMgr.advanceStart(secStrgVm.getUuid(), null, null);
- // update work status
- work.setStartedAfterMaintenance(true);
- _storagePoolWorkDao.update(work.getId(), work);
- }
- }
+ if (vmInstance == null) {
+ continue;
+ }
- // if the instance is of type domain router vm, call the
network
- // manager
- if
(vmInstance.getType().equals(VirtualMachine.Type.DomainRouter)) {
- DomainRouterVO domR =
_domrDao.findById(vmInstance.getId());
- vmMgr.advanceStop(domR.getUuid(), false);
- // update work status
- work.setStoppedForMaintenance(true);
- _storagePoolWorkDao.update(work.getId(), work);
+ // enqueue sp work
+ if (vmInstance.getState().equals(State.Running) ||
vmInstance.getState().equals(State.Starting) ||
vmInstance.getState().equals(State.Stopping)) {
- if (restart) {
- vmMgr.advanceStart(domR.getUuid(), null, null);
- // update work status
- work.setStartedAfterMaintenance(true);
- _storagePoolWorkDao.update(work.getId(), work);
+ try {
+ StoragePoolWorkVO work = new
StoragePoolWorkVO(vmInstance.getId(), pool.getId(), false, false,
server.getId());
+ _storagePoolWorkDao.persist(work);
+ } catch (Exception e) {
+ if (logger.isDebugEnabled()) {
+ logger.debug("Work record already exists, re-using by
re-setting values");
}
+ StoragePoolWorkVO work =
_storagePoolWorkDao.findByPoolIdAndVmId(pool.getId(), vmInstance.getId());
+ work.setStartedAfterMaintenance(false);
+ work.setStoppedForMaintenance(false);
+ work.setManagementServerId(server.getId());
+ _storagePoolWorkDao.update(work.getId(), work);
}
}
- } catch (Exception e) {
- logger.error("Exception in enabling primary storage maintenance:",
e);
- pool.setStatus(StoragePoolStatus.ErrorInMaintenance);
- primaryDataStoreDao.update(pool.getId(), pool);
- throw new CloudRuntimeException(e.getMessage());
}
- return true;
}
- @Override
- public boolean cancelMaintain(DataStore store) {
- // Change the storage state back to up
- Long userId = CallContext.current().getCallingUserId();
- User user = _userDao.findById(userId);
- Account account = CallContext.current().getCallingAccount();
- StoragePoolVO poolVO = primaryDataStoreDao.findById(store.getId());
- StoragePool pool = (StoragePool)store;
+ private void removeHeartbeatForHostsFromPool(List<HostVO> hosts,
StoragePool storagePool) {
+ // remove heartbeat
+ for (HostVO host : hosts) {
+ ModifyStoragePoolCommand cmd = new ModifyStoragePoolCommand(false,
storagePool);
+ final Answer answer = agentMgr.easySend(host.getId(), cmd);
+ if (answer == null || !answer.getResult()) {
+ if (logger.isDebugEnabled()) {
+ logger.debug("ModifyStoragePool false failed due to {}",
((answer == null) ? "answer null" : answer.getDetails()));
+ }
+ } else {
+ reportSucceededModifyStorePool(storagePool,
(ModifyStoragePoolAnswer) answer, host, false);
+ }
+ }
+ }
- //Handeling the Zone wide and cluster wide primay storage
- List<HostVO> hosts = new ArrayList<HostVO>();
- // if the storage scope is ZONE wide, then get all the hosts for which
hypervisor ZWSP created to send Modifystoragepoolcommand
- if (poolVO.getScope().equals(ScopeType.ZONE)) {
+ private void reportSucceededModifyStorePool(StoragePool storagePool,
ModifyStoragePoolAnswer answer, HostVO host, boolean add) {
+ if (logger.isDebugEnabled()) {
+ logger.debug("ModifyStoragePool succeeded for {}", add ? "adding"
: "removing");
+ }
+ if (storagePool.getPoolType() ==
Storage.StoragePoolType.DatastoreCluster) {
+ logger.debug("Started synchronising datastore cluster storage pool
{} with vCenter", storagePool);
+
storageManager.syncDatastoreClusterStoragePool(storagePool.getId(),
answer.getDatastoreClusterChildren(), host.getId());
+ }
+ }
+
+ /**
+ * Handling the Zone wide and cluster wide primary storage
+ * if the storage scope is ZONE wide, then get all the hosts for which
hypervisor ZoneWideStoragePools created to send ModifyStoragePoolCommand
+ * TODO: if it's zone wide, this code will list a lot of hosts in the
zone, which may cause performance/OOM issue.
+ * @param pool pool to check for connected hosts
+ * @return a list of connected hosts
+ */
+ private List<HostVO> getHostsForStoragePool(StoragePoolVO pool) {
+ List<HostVO> hosts;
+ if (pool.getScope().equals(ScopeType.ZONE)) {
if (HypervisorType.Any.equals(pool.getHypervisor())) {
hosts =
_resourceMgr.listAllUpAndEnabledHostsInOneZone(pool.getDataCenterId());
}
else {
- hosts =
_resourceMgr.listAllUpAndEnabledHostsInOneZoneByHypervisor(poolVO.getHypervisor(),
pool.getDataCenterId());
+ hosts =
_resourceMgr.listAllUpAndEnabledHostsInOneZoneByHypervisor(pool.getHypervisor(),
pool.getDataCenterId());
}
} else {
hosts =
_resourceMgr.listHostsInClusterByStatus(pool.getClusterId(), Status.Up);
}
+ return hosts;
+ }
- if (hosts == null || hosts.size() == 0) {
- return true;
+ /**
+ * Handling Zone and Cluster wide storage scopes. Depending on the scope
of the pool, check for other storage pools in the same scope
+ * If the storage is ZONE wide then we pass podId and cluster id as null
as they will be empty for Zone wide storage
+ *
+ * @param pool pool to check for other pools in the same scope
+ */
+ private void getStoragePoolForSpecification(StoragePoolVO pool) {
+ List<StoragePoolVO> storagePools;
+ if (pool.getScope() == ScopeType.ZONE) {
+ storagePools = primaryDataStoreDao.listBy(pool.getDataCenterId(),
null, null, ScopeType.ZONE);
+ } else {
+ storagePools = primaryDataStoreDao.listBy(pool.getDataCenterId(),
pool.getPodId(), pool.getClusterId(), ScopeType.CLUSTER);
}
+ checkHierarchyForPreparingForMaintenance(pool, storagePools);
+ }
- Pair<Map<String, String>, Boolean> nfsMountOpts =
storageManager.getStoragePoolNFSMountOpts(pool, null);
- // add heartbeat
+ /**
+ * If Datastore cluster is tried to prepare for maintenance then child
storage pools are also kept in PrepareForMaintenance mode
+ * @param pool target to put in maintenance
+ * @param storagePools list of possible peers/parents/children
+ */
+ private static void checkHierarchyForPreparingForMaintenance(StoragePoolVO
pool, List<StoragePoolVO> storagePools) {
+ for (StoragePoolVO storagePool : storagePools) {
+ if (!(storagePool.getParent().equals(pool.getParent()) ||
!pool.getParent().equals(storagePool.getId())) &&
+ (storagePool.getStatus() ==
StoragePoolStatus.PrepareForMaintenance)) {
+ throw new CloudRuntimeException(String.format("Only one
storage pool in a cluster can be in PrepareForMaintenance mode, %s is already
in PrepareForMaintenance mode ", storagePool));
Review Comment:
The boolean logic in this condition is complex and hard to understand.
Consider breaking it down into named boolean variables or simplifying the logic
for better readability.
```suggestion
boolean sameParent =
storagePool.getParent().equals(pool.getParent());
boolean poolIsParentOfStoragePool =
pool.getParent().equals(storagePool.getId());
boolean isOtherPoolInMaintenance = storagePool.getStatus() ==
StoragePoolStatus.PrepareForMaintenance;
// Only one storage pool in a cluster can be in
PrepareForMaintenance mode
if (!(sameParent || !poolIsParentOfStoragePool) &&
isOtherPoolInMaintenance) {
throw new CloudRuntimeException(String.format("Only one
storage pool in a cluster can be in PrepareForMaintenance mode, %s is already
in PrepareForMaintenance mode ", storagePool));
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]