CLOUDSTACK-7864: CPVM continues to be in Stopped state after a failure to start because of a management server restart. Added optimization to purge queue items for cancelled jobs.
Project: http://git-wip-us.apache.org/repos/asf/cloudstack/repo Commit: http://git-wip-us.apache.org/repos/asf/cloudstack/commit/0c45c96e Tree: http://git-wip-us.apache.org/repos/asf/cloudstack/tree/0c45c96e Diff: http://git-wip-us.apache.org/repos/asf/cloudstack/diff/0c45c96e Branch: refs/heads/master Commit: 0c45c96ec718edf29ba55fea7dbfda04cc3495e5 Parents: f2cedda Author: Min Chen <min.c...@citrix.com> Authored: Tue Nov 11 10:38:46 2014 -0800 Committer: Min Chen <min.c...@citrix.com> Committed: Tue Nov 11 11:11:18 2014 -0800 ---------------------------------------------------------------------- .../framework/jobs/impl/AsyncJobManagerImpl.java | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cloudstack/blob/0c45c96e/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java ---------------------------------------------------------------------- diff --git a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java index 0e00a88..2ba5d1e 100644 --- a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java +++ b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java @@ -981,13 +981,20 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, // purge sync queue item running on this ms node _queueMgr.cleanupActiveQueueItems(msid, true); // reset job status for all jobs running on this ms node - _jobDao.resetJobProcess(msid, ApiErrorCode.INTERNAL_ERROR.getHttpCode(), "job cancelled because of management server restart or shutdown"); - // purge those queue items for those cancelled jobs above, which may not be picked up by any MS node yet - List<AsyncJobVO> cancelJobs = _jobDao.getResetJobs(msid); - for (AsyncJobVO job : cancelJobs){ + List<AsyncJobVO> jobs = _jobDao.getResetJobs(msid); + for (AsyncJobVO job : jobs) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Cancel left-over job-" + job.getId()); + } + job.setStatus(JobInfo.Status.FAILED); + job.setResultCode(ApiErrorCode.INTERNAL_ERROR.getHttpCode()); + job.setResult("job cancelled because of management server restart or shutdown"); + _jobDao.update(job.getId(), job); + if (s_logger.isDebugEnabled()) { + s_logger.debug("Purge queue item for cancelled job-" + job.getId()); + } _queueMgr.purgeAsyncJobQueueItemId(job.getId()); } - } }); } catch (Throwable e) {