volume-upload: If SSVM is destroyed and started, then partially uploaded volumes/templates remain in inconsistent state (NotUploaded or UploadInProgress) and doesn't transition to any terminal state (Uploaded, Uploaded). As a result these volumes/templates cannot be removed. Fix is to handle such volume/template entries correctly.
Project: http://git-wip-us.apache.org/repos/asf/cloudstack/repo Commit: http://git-wip-us.apache.org/repos/asf/cloudstack/commit/bc399b98 Tree: http://git-wip-us.apache.org/repos/asf/cloudstack/tree/bc399b98 Diff: http://git-wip-us.apache.org/repos/asf/cloudstack/diff/bc399b98 Branch: refs/heads/master Commit: bc399b981f6b9bd7a5471820632e104f6077f7d6 Parents: f172fbe Author: Koushik Das <kous...@apache.org> Authored: Wed Apr 22 19:28:51 2015 +0530 Committer: Koushik Das <kous...@apache.org> Committed: Wed Apr 22 19:28:51 2015 +0530 ---------------------------------------------------------------------- .../storage/image/TemplateServiceImpl.java | 60 +++++++++++++------- .../datastore/ObjectInDataStoreManagerImpl.java | 1 + .../storage/volume/VolumeServiceImpl.java | 51 ++++++++++------- .../storage/ImageStoreUploadMonitorImpl.java | 2 + 4 files changed, 76 insertions(+), 38 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cloudstack/blob/bc399b98/engine/storage/image/src/org/apache/cloudstack/storage/image/TemplateServiceImpl.java ---------------------------------------------------------------------- diff --git a/engine/storage/image/src/org/apache/cloudstack/storage/image/TemplateServiceImpl.java b/engine/storage/image/src/org/apache/cloudstack/storage/image/TemplateServiceImpl.java index c15cda0..f6dc33e 100644 --- a/engine/storage/image/src/org/apache/cloudstack/storage/image/TemplateServiceImpl.java +++ b/engine/storage/image/src/org/apache/cloudstack/storage/image/TemplateServiceImpl.java @@ -29,7 +29,6 @@ import javax.inject.Inject; import org.apache.log4j.Logger; import org.springframework.stereotype.Component; - import org.apache.cloudstack.engine.subsystem.api.storage.CopyCommandResult; import org.apache.cloudstack.engine.subsystem.api.storage.CreateCmdResult; import org.apache.cloudstack.engine.subsystem.api.storage.DataMotionService; @@ -87,12 +86,15 @@ import com.cloud.storage.dao.VMTemplateZoneDao; import com.cloud.storage.template.TemplateConstants; import com.cloud.storage.template.TemplateProp; import com.cloud.template.TemplateManager; +import com.cloud.template.VirtualMachineTemplate; import com.cloud.user.Account; import com.cloud.user.AccountManager; import com.cloud.user.ResourceLimitService; import com.cloud.utils.UriUtils; import com.cloud.utils.db.GlobalLock; import com.cloud.utils.exception.CloudRuntimeException; +import com.cloud.utils.fsm.NoTransitionException; +import com.cloud.utils.fsm.StateMachine2; @Component public class TemplateServiceImpl implements TemplateService { @@ -315,6 +317,7 @@ public class TemplateServiceImpl implements TemplateService { toBeDownloaded.addAll(allTemplates); + final StateMachine2<VirtualMachineTemplate.State, VirtualMachineTemplate.Event, VirtualMachineTemplate> stateMachine = VirtualMachineTemplate.State.getStateMachine(); for (VMTemplateVO tmplt : allTemplates) { String uniqueName = tmplt.getUniqueName(); TemplateDataStoreVO tmpltStore = _vmTemplateStoreDao.findByStoreTemplate(storeId, tmplt.getId()); @@ -330,18 +333,23 @@ public class TemplateServiceImpl implements TemplateService { tmpltStore.setDownloadState(Status.DOWNLOAD_ERROR); String msg = "Template " + tmplt.getName() + ":" + tmplt.getId() + " is corrupted on secondary storage " + tmpltStore.getId(); tmpltStore.setErrorString(msg); - s_logger.info("msg"); - if (tmplt.getUrl() == null) { - msg = - "Private Template (" + tmplt + ") with install path " + tmpltInfo.getInstallPath() + - "is corrupted, please check in image store: " + tmpltStore.getDataStoreId(); + s_logger.info(msg); + if (tmplt.getState() == VirtualMachineTemplate.State.NotUploaded || tmplt.getState() == VirtualMachineTemplate.State.UploadInProgress) { + s_logger.info("Template Sync found " + uniqueName + " on image store " + storeId + " uploaded using SSVM as corrupted, marking it as failed"); + tmpltStore.setState(State.Failed); + try { + stateMachine.transitTo(tmplt, VirtualMachineTemplate.Event.OperationFailed, null, _templateDao); + } catch (NoTransitionException e) { + s_logger.error("Unexpected state transition exception for template " + tmplt.getName() + ". Details: " + e.getMessage()); + } + } else if (tmplt.getUrl() == null) { + msg = "Private template (" + tmplt + ") with install path " + tmpltInfo.getInstallPath() + " is corrupted, please check in image store: " + tmpltStore.getDataStoreId(); s_logger.warn(msg); } else { s_logger.info("Removing template_store_ref entry for corrupted template " + tmplt.getName()); _vmTemplateStoreDao.remove(tmpltStore.getId()); toBeDownloaded.add(tmplt); } - } else { tmpltStore.setDownloadPercent(100); tmpltStore.setDownloadState(Status.DOWNLOADED); @@ -355,6 +363,14 @@ public class TemplateServiceImpl implements TemplateService { tmlpt.setSize(tmpltInfo.getSize()); _templateDao.update(tmplt.getId(), tmlpt); + if (tmplt.getState() == VirtualMachineTemplate.State.NotUploaded || tmplt.getState() == VirtualMachineTemplate.State.UploadInProgress) { + try { + stateMachine.transitTo(tmplt, VirtualMachineTemplate.Event.OperationSucceeded, null, _templateDao); + } catch (NoTransitionException e) { + s_logger.error("Unexpected state transition exception for template " + tmplt.getName() + ". Details: " + e.getMessage()); + } + } + // Skipping limit checks for SYSTEM Account and for the templates created from volumes or snapshots // which already got checked and incremented during createTemplate API call. if (tmpltInfo.getSize() > 0 && tmplt.getAccountId() != Account.ACCOUNT_ID_SYSTEM && tmplt.getUrl() != null) { @@ -374,9 +390,7 @@ public class TemplateServiceImpl implements TemplateService { } _vmTemplateStoreDao.update(tmpltStore.getId(), tmpltStore); } else { - tmpltStore = - new TemplateDataStoreVO(storeId, tmplt.getId(), new Date(), 100, Status.DOWNLOADED, null, null, null, tmpltInfo.getInstallPath(), - tmplt.getUrl()); + tmpltStore = new TemplateDataStoreVO(storeId, tmplt.getId(), new Date(), 100, Status.DOWNLOADED, null, null, null, tmpltInfo.getInstallPath(), tmplt.getUrl()); tmpltStore.setSize(tmpltInfo.getSize()); tmpltStore.setPhysicalSize(tmpltInfo.getPhysicalSize()); tmpltStore.setDataStoreRole(store.getRole()); @@ -387,18 +401,28 @@ public class TemplateServiceImpl implements TemplateService { tmlpt.setSize(tmpltInfo.getSize()); _templateDao.update(tmplt.getId(), tmlpt); associateTemplateToZone(tmplt.getId(), zoneId); - + } + } else if (tmplt.getState() == VirtualMachineTemplate.State.NotUploaded || tmplt.getState() == VirtualMachineTemplate.State.UploadInProgress) { + s_logger.info("Template Sync did not find " + uniqueName + " on image store " + storeId + " uploaded using SSVM, marking it as failed"); + toBeDownloaded.remove(tmplt); + tmpltStore.setDownloadState(Status.DOWNLOAD_ERROR); + String msg = "Template " + tmplt.getName() + ":" + tmplt.getId() + " is corrupted on secondary storage " + tmpltStore.getId(); + tmpltStore.setErrorString(msg); + tmpltStore.setState(State.Failed); + _vmTemplateStoreDao.update(tmpltStore.getId(), tmpltStore); + try { + stateMachine.transitTo(tmplt, VirtualMachineTemplate.Event.OperationFailed, null, _templateDao); + } catch (NoTransitionException e) { + s_logger.error("Unexpected state transition exception for template " + tmplt.getName() + ". Details: " + e.getMessage()); } } else { - s_logger.info("Template Sync did not find " + uniqueName + " on image store " + storeId + - ", may request download based on available hypervisor types"); + s_logger.info("Template Sync did not find " + uniqueName + " on image store " + storeId + ", may request download based on available hypervisor types"); if (tmpltStore != null) { if (_storeMgr.isRegionStore(store) && tmpltStore.getDownloadState() == VMTemplateStorageResourceAssoc.Status.DOWNLOADED && tmpltStore.getState() == State.Ready && tmpltStore.getInstallPath() == null) { s_logger.info("Keep fake entry in template store table for migration of previous NFS to object store"); - } - else { + } else { s_logger.info("Removing leftover template " + uniqueName + " entry from template store table"); // remove those leftover entries _vmTemplateStoreDao.remove(tmpltStore.getId()); @@ -422,7 +446,7 @@ public class TemplateServiceImpl implements TemplateService { availHypers.add(HypervisorType.None); // bug 9809: resume ISO // download. for (VMTemplateVO tmplt : toBeDownloaded) { - if (tmplt.getUrl() == null) { // If url is null we can't + if (tmplt.getUrl() == null) { // If url is null, skip downloading s_logger.info("Skip downloading template " + tmplt.getUniqueName() + " since no url is specified."); continue; } @@ -458,9 +482,7 @@ public class TemplateServiceImpl implements TemplateService { for (String uniqueName : templateInfos.keySet()) { TemplateProp tInfo = templateInfos.get(uniqueName); if (_tmpltMgr.templateIsDeleteable(tInfo.getId())) { - // we cannot directly call deleteTemplateSync here to - // reuse delete logic since in this case, our db does not have - // this template at all. + // we cannot directly call deleteTemplateSync here to reuse delete logic since in this case db does not have this template at all. TemplateObjectTO tmplTO = new TemplateObjectTO(); tmplTO.setDataStore(store.getTO()); tmplTO.setPath(tInfo.getInstallPath()); http://git-wip-us.apache.org/repos/asf/cloudstack/blob/bc399b98/engine/storage/src/org/apache/cloudstack/storage/datastore/ObjectInDataStoreManagerImpl.java ---------------------------------------------------------------------- diff --git a/engine/storage/src/org/apache/cloudstack/storage/datastore/ObjectInDataStoreManagerImpl.java b/engine/storage/src/org/apache/cloudstack/storage/datastore/ObjectInDataStoreManagerImpl.java index 8c92cfc..814f44a 100644 --- a/engine/storage/src/org/apache/cloudstack/storage/datastore/ObjectInDataStoreManagerImpl.java +++ b/engine/storage/src/org/apache/cloudstack/storage/datastore/ObjectInDataStoreManagerImpl.java @@ -88,6 +88,7 @@ public class ObjectInDataStoreManagerImpl implements ObjectInDataStoreManager { stateMachines = new StateMachine2<State, Event, DataObjectInStore>(); stateMachines.addTransition(State.Allocated, Event.CreateOnlyRequested, State.Creating); stateMachines.addTransition(State.Allocated, Event.DestroyRequested, State.Destroying); + stateMachines.addTransition(State.Allocated, Event.OperationFailed, State.Failed); stateMachines.addTransition(State.Creating, Event.OperationFailed, State.Allocated); stateMachines.addTransition(State.Creating, Event.OperationSuccessed, State.Ready); stateMachines.addTransition(State.Ready, Event.CopyingRequested, State.Copying); http://git-wip-us.apache.org/repos/asf/cloudstack/blob/bc399b98/engine/storage/volume/src/org/apache/cloudstack/storage/volume/VolumeServiceImpl.java ---------------------------------------------------------------------- diff --git a/engine/storage/volume/src/org/apache/cloudstack/storage/volume/VolumeServiceImpl.java b/engine/storage/volume/src/org/apache/cloudstack/storage/volume/VolumeServiceImpl.java index b0795ae..436c462 100644 --- a/engine/storage/volume/src/org/apache/cloudstack/storage/volume/VolumeServiceImpl.java +++ b/engine/storage/volume/src/org/apache/cloudstack/storage/volume/VolumeServiceImpl.java @@ -1390,7 +1390,7 @@ public class VolumeServiceImpl implements VolumeService { for (VolumeDataStoreVO volumeStore : dbVolumes) { VolumeVO volume = _volumeDao.findById(volumeStore.getVolumeId()); if (volume == null) { - s_logger.warn("Volume_store_ref shows that volume " + volumeStore.getVolumeId() + " is on image store " + storeId + + s_logger.warn("Volume_store_ref table shows that volume " + volumeStore.getVolumeId() + " is on image store " + storeId + ", but the volume is not found in volumes table, potentially some bugs in deleteVolume, so we just treat this volume to be deleted and mark it as destroyed"); volumeStore.setDestroyed(true); _volumeStoreDao.update(volumeStore.getId(), volumeStore); @@ -1406,20 +1406,23 @@ public class VolumeServiceImpl implements VolumeService { } if (volInfo.isCorrupted()) { volumeStore.setDownloadState(Status.DOWNLOAD_ERROR); - String msg = "Volume " + volume.getUuid() + " is corrupted on image store "; + String msg = "Volume " + volume.getUuid() + " is corrupted on image store"; volumeStore.setErrorString(msg); - s_logger.info("msg"); - if (volumeStore.getDownloadUrl() == null) { - msg = - "Volume (" + volume.getUuid() + ") with install path " + volInfo.getInstallPath() + - "is corrupted, please check in image store: " + volumeStore.getDataStoreId(); + s_logger.info(msg); + if (volume.getState() == State.NotUploaded || volume.getState() == State.UploadInProgress) { + s_logger.info("Volume Sync found " + volume.getUuid() + " uploaded using SSVM on image store " + storeId + " as corrupted, marking it as failed"); + _volumeStoreDao.update(volumeStore.getId(), volumeStore); + // mark volume as failed, so that storage GC will clean it up + VolumeObject volObj = (VolumeObject)volFactory.getVolume(volume.getId()); + volObj.processEvent(Event.OperationFailed); + } else if (volumeStore.getDownloadUrl() == null) { + msg = "Volume (" + volume.getUuid() + ") with install path " + volInfo.getInstallPath() + " is corrupted, please check in image store: " + volumeStore.getDataStoreId(); s_logger.warn(msg); } else { s_logger.info("Removing volume_store_ref entry for corrupted volume " + volume.getName()); _volumeStoreDao.remove(volumeStore.getId()); toBeDownloaded.add(volumeStore); } - } else { // Put them in right status volumeStore.setDownloadPercent(100); volumeStore.setDownloadState(Status.DOWNLOADED); @@ -1436,15 +1439,18 @@ public class VolumeServiceImpl implements VolumeService { _volumeDao.update(volumeStore.getVolumeId(), volume); } + if (volume.getState() == State.NotUploaded || volume.getState() == State.UploadInProgress) { + VolumeObject volObj = (VolumeObject)volFactory.getVolume(volume.getId()); + volObj.processEvent(Event.OperationSuccessed); + } + if (volInfo.getSize() > 0) { try { _resourceLimitMgr.checkResourceLimit(_accountMgr.getAccount(volume.getAccountId()), com.cloud.configuration.Resource.ResourceType.secondary_storage, volInfo.getSize() - volInfo.getPhysicalSize()); } catch (ResourceAllocationException e) { s_logger.warn(e.getMessage()); - _alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_RESOURCE_LIMIT_EXCEEDED, volume.getDataCenterId(), volume.getPodId(), - e.getMessage(), - e.getMessage()); + _alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_RESOURCE_LIMIT_EXCEEDED, volume.getDataCenterId(), volume.getPodId(), e.getMessage(), e.getMessage()); } finally { _resourceLimitMgr.recalculateResourceCount(volume.getAccountId(), volume.getDomainId(), com.cloud.configuration.Resource.ResourceType.secondary_storage.getOrdinal()); @@ -1452,11 +1458,21 @@ public class VolumeServiceImpl implements VolumeService { } } continue; + } else if (volume.getState() == State.NotUploaded || volume.getState() == State.UploadInProgress) { // failed uploads through SSVM + s_logger.info("Volume Sync did not find " + volume.getUuid() + " uploaded using SSVM on image store " + storeId + ", marking it as failed"); + toBeDownloaded.remove(volumeStore); + volumeStore.setDownloadState(Status.DOWNLOAD_ERROR); + String msg = "Volume " + volume.getUuid() + " is corrupted on image store"; + volumeStore.setErrorString(msg); + _volumeStoreDao.update(volumeStore.getId(), volumeStore); + // mark volume as failed, so that storage GC will clean it up + VolumeObject volObj = (VolumeObject)volFactory.getVolume(volume.getId()); + volObj.processEvent(Event.OperationFailed); + continue; } // Volume is not on secondary but we should download. if (volumeStore.getDownloadState() != Status.DOWNLOADED) { - s_logger.info("Volume Sync did not find " + volume.getName() + " ready on image store " + storeId + - ", will request download to start/resume shortly"); + s_logger.info("Volume Sync did not find " + volume.getName() + " ready on image store " + storeId + ", will request download to start/resume shortly"); toBeDownloaded.add(volumeStore); } } @@ -1464,7 +1480,7 @@ public class VolumeServiceImpl implements VolumeService { // Download volumes which haven't been downloaded yet. if (toBeDownloaded.size() > 0) { for (VolumeDataStoreVO volumeHost : toBeDownloaded) { - if (volumeHost.getDownloadUrl() == null) { // If url is null we + if (volumeHost.getDownloadUrl() == null) { // If url is null, skip downloading s_logger.info("Skip downloading volume " + volumeHost.getVolumeId() + " since no download url is specified."); continue; } @@ -1472,8 +1488,7 @@ public class VolumeServiceImpl implements VolumeService { // if this is a region store, and there is already an DOWNLOADED entry there without install_path information, which // means that this is a duplicate entry from migration of previous NFS to staging. if (store.getScope().getScopeType() == ScopeType.REGION) { - if (volumeHost.getDownloadState() == VMTemplateStorageResourceAssoc.Status.DOWNLOADED - && volumeHost.getInstallPath() == null) { + if (volumeHost.getDownloadState() == VMTemplateStorageResourceAssoc.Status.DOWNLOADED && volumeHost.getInstallPath() == null) { s_logger.info("Skip sync volume for migration of previous NFS to object store"); continue; } @@ -1493,9 +1508,7 @@ public class VolumeServiceImpl implements VolumeService { Long uniqueName = entry.getKey(); TemplateProp tInfo = entry.getValue(); - //we cannot directly call expungeVolumeAsync here to - // reuse delete logic since in this case, our db does not have - // this template at all. + // we cannot directly call expungeVolumeAsync here to reuse delete logic since in this case db does not have this volume at all. VolumeObjectTO tmplTO = new VolumeObjectTO(); tmplTO.setDataStore(store.getTO()); tmplTO.setPath(tInfo.getInstallPath()); http://git-wip-us.apache.org/repos/asf/cloudstack/blob/bc399b98/server/src/com/cloud/storage/ImageStoreUploadMonitorImpl.java ---------------------------------------------------------------------- diff --git a/server/src/com/cloud/storage/ImageStoreUploadMonitorImpl.java b/server/src/com/cloud/storage/ImageStoreUploadMonitorImpl.java index 84b33d2..167f19f 100755 --- a/server/src/com/cloud/storage/ImageStoreUploadMonitorImpl.java +++ b/server/src/com/cloud/storage/ImageStoreUploadMonitorImpl.java @@ -303,6 +303,7 @@ public class ImageStoreUploadMonitorImpl extends ManagerBase implements ImageSto } else if (tmpVolume.getState() == Volume.State.UploadInProgress) { // check for timeout if (System.currentTimeMillis() - tmpVolumeDataStore.getCreated().getTime() > _uploadOperationTimeout) { tmpVolumeDataStore.setDownloadState(VMTemplateStorageResourceAssoc.Status.DOWNLOAD_ERROR); + tmpVolumeDataStore.setState(State.Failed); stateMachine.transitTo(tmpVolume, Event.OperationFailed, null, _volumeDao); if (s_logger.isDebugEnabled()) { s_logger.debug("Volume " + tmpVolume.getUuid() + " failed to upload due to operation timed out"); @@ -377,6 +378,7 @@ public class ImageStoreUploadMonitorImpl extends ManagerBase implements ImageSto } else if (tmpTemplate.getState() == VirtualMachineTemplate.State.UploadInProgress) { // check for timeout if (System.currentTimeMillis() - tmpTemplateDataStore.getCreated().getTime() > _uploadOperationTimeout) { tmpTemplateDataStore.setDownloadState(VMTemplateStorageResourceAssoc.Status.DOWNLOAD_ERROR); + tmpTemplateDataStore.setState(State.Failed); stateMachine.transitTo(tmpTemplate, VirtualMachineTemplate.Event.OperationFailed, null, _templateDao); if (s_logger.isDebugEnabled()) { s_logger.debug("Template " + tmpTemplate.getUuid() + " failed to upload due to operation timed out");