This is an automated email from the ASF dual-hosted git repository.
dahn pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudstack.git
The following commit(s) were added to refs/heads/main by this push:
new c9f1c5790d1 Fix snapshot scheduling with expired jobs (#8832)
c9f1c5790d1 is described below
commit c9f1c5790d131b744fb16cc417c7f9540d7c604d
Author: Henrique Sato <[email protected]>
AuthorDate: Fri Aug 23 06:19:13 2024 -0300
Fix snapshot scheduling with expired jobs (#8832)
Co-authored-by: Henrique Sato <[email protected]>
---
.../java/com/cloud/storage/SnapshotScheduleVO.java | 9 +++
.../com/cloud/storage/dao/SnapshotScheduleDao.java | 6 +-
.../cloud/storage/dao/SnapshotScheduleDaoImpl.java | 35 +++-----
.../resources/META-INF/db/schema-41910to42000.sql | 3 +
.../storage/snapshot/SnapshotSchedulerImpl.java | 94 +++++++---------------
.../snapshot/SnapshotSchedulerImplTest.java | 59 ++++++++++++++
6 files changed, 113 insertions(+), 93 deletions(-)
diff --git
a/engine/schema/src/main/java/com/cloud/storage/SnapshotScheduleVO.java
b/engine/schema/src/main/java/com/cloud/storage/SnapshotScheduleVO.java
index 80a890aacad..86e0da53666 100644
--- a/engine/schema/src/main/java/com/cloud/storage/SnapshotScheduleVO.java
+++ b/engine/schema/src/main/java/com/cloud/storage/SnapshotScheduleVO.java
@@ -29,6 +29,8 @@ import javax.persistence.Temporal;
import javax.persistence.TemporalType;
import com.cloud.storage.snapshot.SnapshotSchedule;
+import org.apache.commons.lang3.builder.ReflectionToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
@Entity
@Table(name = "snapshot_schedule")
@@ -132,4 +134,11 @@ public class SnapshotScheduleVO implements
SnapshotSchedule {
public void setUuid(String uuid) {
this.uuid = uuid;
}
+
+ @Override
+ public String toString() {
+ ReflectionToStringBuilder reflectionToStringBuilder = new
ReflectionToStringBuilder(this, ToStringStyle.JSON_STYLE);
+ reflectionToStringBuilder.setExcludeFieldNames("id");
+ return reflectionToStringBuilder.toString();
+ }
}
diff --git
a/engine/schema/src/main/java/com/cloud/storage/dao/SnapshotScheduleDao.java
b/engine/schema/src/main/java/com/cloud/storage/dao/SnapshotScheduleDao.java
index 7ca0a3915f5..284a42cf9e1 100644
--- a/engine/schema/src/main/java/com/cloud/storage/dao/SnapshotScheduleDao.java
+++ b/engine/schema/src/main/java/com/cloud/storage/dao/SnapshotScheduleDao.java
@@ -27,13 +27,11 @@ import com.cloud.utils.db.GenericDao;
*/
public interface SnapshotScheduleDao extends GenericDao<SnapshotScheduleVO,
Long> {
- List<SnapshotScheduleVO> getCoincidingSnapshotSchedules(long volumeId,
Date date);
-
List<SnapshotScheduleVO> getSchedulesToExecute(Date currentTimestamp);
- SnapshotScheduleVO getCurrentSchedule(Long volumeId, Long policyId,
boolean executing);
+ List<SnapshotScheduleVO> getSchedulesAssignedWithAsyncJob();
- SnapshotScheduleVO findOneByVolume(long volumeId);
+ SnapshotScheduleVO getCurrentSchedule(Long volumeId, Long policyId,
boolean executing);
SnapshotScheduleVO findOneByVolumePolicy(long volumeId, long policyId);
diff --git
a/engine/schema/src/main/java/com/cloud/storage/dao/SnapshotScheduleDaoImpl.java
b/engine/schema/src/main/java/com/cloud/storage/dao/SnapshotScheduleDaoImpl.java
index 925d02dd90b..14669ce1d43 100644
---
a/engine/schema/src/main/java/com/cloud/storage/dao/SnapshotScheduleDaoImpl.java
+++
b/engine/schema/src/main/java/com/cloud/storage/dao/SnapshotScheduleDaoImpl.java
@@ -32,7 +32,7 @@ import com.cloud.utils.db.SearchCriteria;
public class SnapshotScheduleDaoImpl extends
GenericDaoBase<SnapshotScheduleVO, Long> implements SnapshotScheduleDao {
protected final SearchBuilder<SnapshotScheduleVO>
executableSchedulesSearch;
protected final SearchBuilder<SnapshotScheduleVO>
coincidingSchedulesSearch;
- private final SearchBuilder<SnapshotScheduleVO> VolumeIdSearch;
+ protected final SearchBuilder<SnapshotScheduleVO>
schedulesAssignedWithAsyncJob;
private final SearchBuilder<SnapshotScheduleVO> VolumeIdPolicyIdSearch;
protected SnapshotScheduleDaoImpl() {
@@ -48,36 +48,14 @@ public class SnapshotScheduleDaoImpl extends
GenericDaoBase<SnapshotScheduleVO,
coincidingSchedulesSearch.and("asyncJobId",
coincidingSchedulesSearch.entity().getAsyncJobId(), SearchCriteria.Op.NULL);
coincidingSchedulesSearch.done();
- VolumeIdSearch = createSearchBuilder();
- VolumeIdSearch.and("volumeId", VolumeIdSearch.entity().getVolumeId(),
SearchCriteria.Op.EQ);
- VolumeIdSearch.done();
-
VolumeIdPolicyIdSearch = createSearchBuilder();
VolumeIdPolicyIdSearch.and("volumeId",
VolumeIdPolicyIdSearch.entity().getVolumeId(), SearchCriteria.Op.EQ);
VolumeIdPolicyIdSearch.and("policyId",
VolumeIdPolicyIdSearch.entity().getPolicyId(), SearchCriteria.Op.EQ);
VolumeIdPolicyIdSearch.done();
- }
-
- /**
- * {@inheritDoc}
- */
- @Override
- public List<SnapshotScheduleVO> getCoincidingSnapshotSchedules(long
volumeId, Date date) {
- SearchCriteria<SnapshotScheduleVO> sc =
coincidingSchedulesSearch.create();
- sc.setParameters("volumeId", volumeId);
- sc.setParameters("scheduledTimestamp", date);
- // Don't return manual snapshots. They will be executed through another
- // code path.
- sc.addAnd("policyId", SearchCriteria.Op.NEQ, 1L);
- return listBy(sc);
- }
-
- @Override
- public SnapshotScheduleVO findOneByVolume(long volumeId) {
- SearchCriteria<SnapshotScheduleVO> sc = VolumeIdSearch.create();
- sc.setParameters("volumeId", volumeId);
- return findOneBy(sc);
+ schedulesAssignedWithAsyncJob = createSearchBuilder();
+ schedulesAssignedWithAsyncJob.and("asyncJobId",
schedulesAssignedWithAsyncJob.entity().getAsyncJobId(),
SearchCriteria.Op.NNULL);
+ schedulesAssignedWithAsyncJob.done();
}
@Override
@@ -98,6 +76,11 @@ public class SnapshotScheduleDaoImpl extends
GenericDaoBase<SnapshotScheduleVO,
return listBy(sc);
}
+ @Override
+ public List<SnapshotScheduleVO> getSchedulesAssignedWithAsyncJob() {
+ return listBy(schedulesAssignedWithAsyncJob.create());
+ }
+
/**
* {@inheritDoc}
*/
diff --git
a/engine/schema/src/main/resources/META-INF/db/schema-41910to42000.sql
b/engine/schema/src/main/resources/META-INF/db/schema-41910to42000.sql
index 2bcf41a4042..f7c78670ddd 100644
--- a/engine/schema/src/main/resources/META-INF/db/schema-41910to42000.sql
+++ b/engine/schema/src/main/resources/META-INF/db/schema-41910to42000.sql
@@ -90,6 +90,9 @@ CREATE TABLE IF NOT EXISTS
`cloud_usage`.`quota_email_configuration`(
CONSTRAINT `FK_quota_email_configuration_account_id` FOREIGN KEY
(`account_id`) REFERENCES `cloud_usage`.`quota_account`(`account_id`),
CONSTRAINT `FK_quota_email_configuration_email_template_id` FOREIGN KEY
(`email_template_id`) REFERENCES `cloud_usage`.`quota_email_templates`(`id`));
+-- Remove on delete cascade from snapshot schedule
+ALTER TABLE `cloud`.`snapshot_schedule` DROP CONSTRAINT
`fk__snapshot_schedule_async_job_id`;
+
-- Add `is_implicit` column to `host_tags` table
CALL `cloud`.`IDEMPOTENT_ADD_COLUMN`('cloud.host_tags', 'is_implicit', 'int(1)
UNSIGNED NOT NULL DEFAULT 0 COMMENT "If host tag is implicit or explicit" ');
diff --git
a/server/src/main/java/com/cloud/storage/snapshot/SnapshotSchedulerImpl.java
b/server/src/main/java/com/cloud/storage/snapshot/SnapshotSchedulerImpl.java
index 29955066062..2a53021636c 100644
--- a/server/src/main/java/com/cloud/storage/snapshot/SnapshotSchedulerImpl.java
+++ b/server/src/main/java/com/cloud/storage/snapshot/SnapshotSchedulerImpl.java
@@ -35,6 +35,7 @@ import
org.apache.cloudstack.framework.jobs.AsyncJobDispatcher;
import org.apache.cloudstack.framework.jobs.AsyncJobManager;
import org.apache.cloudstack.framework.jobs.dao.AsyncJobDao;
import org.apache.cloudstack.framework.jobs.impl.AsyncJobVO;
+import org.apache.cloudstack.jobs.JobInfo;
import org.apache.cloudstack.managed.context.ManagedContextTimerTask;
import org.springframework.stereotype.Component;
@@ -47,7 +48,6 @@ import com.cloud.server.TaggedResourceService;
import com.cloud.storage.Snapshot;
import com.cloud.storage.SnapshotPolicyVO;
import com.cloud.storage.SnapshotScheduleVO;
-import com.cloud.storage.SnapshotVO;
import com.cloud.storage.VolumeVO;
import com.cloud.storage.dao.SnapshotDao;
import com.cloud.storage.dao.SnapshotPolicyDao;
@@ -64,7 +64,6 @@ import com.cloud.utils.component.ManagerBase;
import com.cloud.utils.concurrency.TestClock;
import com.cloud.utils.db.DB;
import com.cloud.utils.db.GlobalLock;
-import com.cloud.utils.db.SearchCriteria;
import com.cloud.utils.db.TransactionLegacy;
import com.cloud.vm.snapshot.VMSnapshotManager;
import com.cloud.vm.snapshot.VMSnapshotVO;
@@ -144,7 +143,7 @@ public class SnapshotSchedulerImpl extends ManagerBase
implements SnapshotSchedu
try {
if (scanLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_COOPERATION)) {
try {
- checkStatusOfCurrentlyExecutingSnapshots();
+ scheduleNextSnapshotJobsIfNecessary();
} finally {
scanLock.unlock();
}
@@ -174,68 +173,37 @@ public class SnapshotSchedulerImpl extends ManagerBase
implements SnapshotSchedu
}
}
- private void checkStatusOfCurrentlyExecutingSnapshots() {
- final SearchCriteria<SnapshotScheduleVO> sc =
_snapshotScheduleDao.createSearchCriteria();
- sc.addAnd("asyncJobId", SearchCriteria.Op.NNULL);
- final List<SnapshotScheduleVO> snapshotSchedules =
_snapshotScheduleDao.search(sc, null);
- for (final SnapshotScheduleVO snapshotSchedule : snapshotSchedules) {
- final Long asyncJobId = snapshotSchedule.getAsyncJobId();
- final AsyncJobVO asyncJob =
_asyncJobDao.findByIdIncludingRemoved(asyncJobId);
- switch (asyncJob.getStatus()) {
- case SUCCEEDED:
- // The snapshot has been successfully backed up.
- // The snapshot state has also been cleaned up.
- // We can schedule the next job for this snapshot.
- // Remove the existing entry in the snapshot_schedule
table.
- scheduleNextSnapshotJob(snapshotSchedule);
- break;
- case FAILED:
- // Check the snapshot status.
- final Long snapshotId = snapshotSchedule.getSnapshotId();
- if (snapshotId == null) {
- // createSnapshotAsync exited, successfully or
unsuccessfully,
- // even before creating a snapshot record
- // No cleanup needs to be done.
- // Schedule the next snapshot.
- scheduleNextSnapshotJob(snapshotSchedule);
- } else {
- final SnapshotVO snapshot =
_snapshotDao.findById(snapshotId);
- if (snapshot == null || snapshot.getRemoved() != null)
{
- // This snapshot has been deleted successfully
from the primary storage
- // Again no cleanup needs to be done.
- // Schedule the next snapshot.
- // There's very little probability that the code
reaches this point.
- // The snapshotId is a foreign key for the
snapshot_schedule table
- // set to ON DELETE CASCADE. So if the snapshot
entry is deleted, the snapshot_schedule entry will be too.
- // But what if it has only been marked as removed?
- scheduleNextSnapshotJob(snapshotSchedule);
- } else {
- // The management server executing this snapshot
job appears to have crashed
- // while creating the snapshot on primary
storage/or backing it up.
- // We have no idea whether the snapshot was
successfully taken on the primary or not.
- // Schedule the next snapshot job.
- // The ValidatePreviousSnapshotCommand will take
appropriate action on this snapshot
- // If the snapshot was taken successfully on
primary, it will retry backing it up.
- // and cleanup the previous snapshot
- // Set the userId to that of system.
- //_snapshotManager.validateSnapshot(1L, snapshot);
- // In all cases, schedule the next snapshot job
- scheduleNextSnapshotJob(snapshotSchedule);
- }
- }
+ private void scheduleNextSnapshotJobsIfNecessary() {
+ List<SnapshotScheduleVO> snapshotSchedules =
_snapshotScheduleDao.getSchedulesAssignedWithAsyncJob();
+ logger.info("Verifying the current state of [{}] snapshot schedules
and scheduling next jobs, if necessary.", snapshotSchedules.size());
+ for (SnapshotScheduleVO snapshotSchedule : snapshotSchedules) {
+ scheduleNextSnapshotJobIfNecessary(snapshotSchedule);
+ }
+ }
- break;
- case IN_PROGRESS:
- // There is no way of knowing from here whether
- // 1) Another management server is processing this
snapshot job
- // 2) The management server has crashed and this snapshot
is lying
- // around in an inconsistent state.
- // Hopefully, this can be resolved at the backend when the
current snapshot gets executed.
- // But if it remains in this state, the current snapshot
will not get executed.
- // And it will remain in stasis.
- break;
- }
+ protected void scheduleNextSnapshotJobIfNecessary(SnapshotScheduleVO
snapshotSchedule) {
+ Long asyncJobId = snapshotSchedule.getAsyncJobId();
+ AsyncJobVO asyncJob =
_asyncJobDao.findByIdIncludingRemoved(asyncJobId);
+
+ if (asyncJob == null) {
+ logger.debug("The async job [{}] of snapshot schedule [{}] does
not exist anymore. Considering it as finished and scheduling the next snapshot
job.",
+ asyncJobId, snapshotSchedule);
+ scheduleNextSnapshotJob(snapshotSchedule);
+ return;
}
+
+ JobInfo.Status status = asyncJob.getStatus();
+
+ if (JobInfo.Status.SUCCEEDED.equals(status)) {
+ logger.debug("Last job of schedule [{}] succeeded; scheduling the
next snapshot job.", snapshotSchedule);
+ } else if (JobInfo.Status.FAILED.equals(status)) {
+ logger.debug("Last job of schedule [{}] failed with [{}];
scheduling a new snapshot job.", snapshotSchedule, asyncJob.getResult());
+ } else {
+ logger.debug("Schedule [{}] is still in progress, skipping next
job scheduling.", snapshotSchedule);
+ return;
+ }
+
+ scheduleNextSnapshotJob(snapshotSchedule);
}
@DB
diff --git
a/server/src/test/java/com/cloud/storage/snapshot/SnapshotSchedulerImplTest.java
b/server/src/test/java/com/cloud/storage/snapshot/SnapshotSchedulerImplTest.java
index 971af289ef7..3827531891f 100644
---
a/server/src/test/java/com/cloud/storage/snapshot/SnapshotSchedulerImplTest.java
+++
b/server/src/test/java/com/cloud/storage/snapshot/SnapshotSchedulerImplTest.java
@@ -26,6 +26,9 @@ import com.cloud.storage.dao.VolumeDao;
import com.cloud.user.Account;
import com.cloud.user.AccountVO;
import com.cloud.user.dao.AccountDao;
+import org.apache.cloudstack.framework.jobs.dao.AsyncJobDao;
+import org.apache.cloudstack.framework.jobs.impl.AsyncJobVO;
+import org.apache.cloudstack.jobs.JobInfo;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
@@ -65,6 +68,16 @@ public class SnapshotSchedulerImplTest {
@Mock
AccountVO accountVoMock;
+ @Mock
+ private SnapshotScheduleVO snapshotScheduleVoMock;
+
+ @Mock
+ private AsyncJobDao asyncJobDaoMock;
+
+ @Mock
+ private AsyncJobVO asyncJobVoMock;
+
+
@Test
public void scheduleNextSnapshotJobTestParameterIsNullReturnNull() {
SnapshotScheduleVO snapshotScheduleVO = null;
@@ -215,4 +228,50 @@ public class SnapshotSchedulerImplTest {
Mockito.verify(snapshotScheduleDaoMock,
Mockito.never()).remove(Mockito.anyLong());
}
+
+ @Test
+ public void
scheduleNextSnapshotJobIfNecessaryTestAsyncJobIsNullThenScheduleNextSnapshot() {
+ Mockito.doReturn(1L).when(snapshotScheduleVoMock).getAsyncJobId();
+
Mockito.doReturn(null).when(asyncJobDaoMock).findByIdIncludingRemoved(Mockito.any());
+ Mockito.doReturn(new
Date()).when(snapshotSchedulerImplSpy).scheduleNextSnapshotJob(Mockito.any(SnapshotScheduleVO.class));
+
+
snapshotSchedulerImplSpy.scheduleNextSnapshotJobIfNecessary(snapshotScheduleVoMock);
+
+
Mockito.verify(snapshotSchedulerImplSpy).scheduleNextSnapshotJob(Mockito.any(SnapshotScheduleVO.class));
+ }
+
+ @Test
+ public void
scheduleNextSnapshotJobIfNecessaryTestAsyncJobSucceededThenScheduleNextSnapshot()
{
+ Mockito.doReturn(1L).when(snapshotScheduleVoMock).getAsyncJobId();
+
Mockito.doReturn(asyncJobVoMock).when(asyncJobDaoMock).findByIdIncludingRemoved(Mockito.any());
+
Mockito.doReturn(JobInfo.Status.SUCCEEDED).when(asyncJobVoMock).getStatus();
+ Mockito.doReturn(new
Date()).when(snapshotSchedulerImplSpy).scheduleNextSnapshotJob(Mockito.any(SnapshotScheduleVO.class));
+
+
snapshotSchedulerImplSpy.scheduleNextSnapshotJobIfNecessary(snapshotScheduleVoMock);
+
+
Mockito.verify(snapshotSchedulerImplSpy).scheduleNextSnapshotJob(Mockito.any(SnapshotScheduleVO.class));
+ }
+
+ @Test
+ public void
scheduleNextSnapshotJobIfNecessaryTestAsyncJobFailedThenScheduleNextSnapshot() {
+ Mockito.doReturn(1L).when(snapshotScheduleVoMock).getAsyncJobId();
+
Mockito.doReturn(asyncJobVoMock).when(asyncJobDaoMock).findByIdIncludingRemoved(Mockito.any());
+
Mockito.doReturn(JobInfo.Status.FAILED).when(asyncJobVoMock).getStatus();
+ Mockito.doReturn(new
Date()).when(snapshotSchedulerImplSpy).scheduleNextSnapshotJob(Mockito.any(SnapshotScheduleVO.class));
+
+
snapshotSchedulerImplSpy.scheduleNextSnapshotJobIfNecessary(snapshotScheduleVoMock);
+
+
Mockito.verify(snapshotSchedulerImplSpy).scheduleNextSnapshotJob(Mockito.any(SnapshotScheduleVO.class));
+ }
+
+ @Test
+ public void
scheduleNextSnapshotJobIfNecessaryTestAsyncJobInProgressThenDoNothing() {
+ Mockito.doReturn(1L).when(snapshotScheduleVoMock).getAsyncJobId();
+
Mockito.doReturn(asyncJobVoMock).when(asyncJobDaoMock).findByIdIncludingRemoved(Mockito.any());
+
Mockito.doReturn(JobInfo.Status.IN_PROGRESS).when(asyncJobVoMock).getStatus();
+
+
snapshotSchedulerImplSpy.scheduleNextSnapshotJobIfNecessary(snapshotScheduleVoMock);
+
+ Mockito.verify(snapshotSchedulerImplSpy,
Mockito.never()).scheduleNextSnapshotJob(Mockito.any(SnapshotScheduleVO.class));
+ }
}