This is an automated email from the ASF dual-hosted git repository.

dahn pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudstack.git


The following commit(s) were added to refs/heads/main by this push:
     new c9f1c5790d1 Fix snapshot scheduling with expired jobs (#8832)
c9f1c5790d1 is described below

commit c9f1c5790d131b744fb16cc417c7f9540d7c604d
Author: Henrique Sato <[email protected]>
AuthorDate: Fri Aug 23 06:19:13 2024 -0300

    Fix snapshot scheduling with expired jobs (#8832)
    
    Co-authored-by: Henrique Sato <[email protected]>
---
 .../java/com/cloud/storage/SnapshotScheduleVO.java |  9 +++
 .../com/cloud/storage/dao/SnapshotScheduleDao.java |  6 +-
 .../cloud/storage/dao/SnapshotScheduleDaoImpl.java | 35 +++-----
 .../resources/META-INF/db/schema-41910to42000.sql  |  3 +
 .../storage/snapshot/SnapshotSchedulerImpl.java    | 94 +++++++---------------
 .../snapshot/SnapshotSchedulerImplTest.java        | 59 ++++++++++++++
 6 files changed, 113 insertions(+), 93 deletions(-)

diff --git 
a/engine/schema/src/main/java/com/cloud/storage/SnapshotScheduleVO.java 
b/engine/schema/src/main/java/com/cloud/storage/SnapshotScheduleVO.java
index 80a890aacad..86e0da53666 100644
--- a/engine/schema/src/main/java/com/cloud/storage/SnapshotScheduleVO.java
+++ b/engine/schema/src/main/java/com/cloud/storage/SnapshotScheduleVO.java
@@ -29,6 +29,8 @@ import javax.persistence.Temporal;
 import javax.persistence.TemporalType;
 
 import com.cloud.storage.snapshot.SnapshotSchedule;
+import org.apache.commons.lang3.builder.ReflectionToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 @Entity
 @Table(name = "snapshot_schedule")
@@ -132,4 +134,11 @@ public class SnapshotScheduleVO implements 
SnapshotSchedule {
     public void setUuid(String uuid) {
         this.uuid = uuid;
     }
+
+    @Override
+    public String toString() {
+        ReflectionToStringBuilder reflectionToStringBuilder = new 
ReflectionToStringBuilder(this, ToStringStyle.JSON_STYLE);
+        reflectionToStringBuilder.setExcludeFieldNames("id");
+        return reflectionToStringBuilder.toString();
+    }
 }
diff --git 
a/engine/schema/src/main/java/com/cloud/storage/dao/SnapshotScheduleDao.java 
b/engine/schema/src/main/java/com/cloud/storage/dao/SnapshotScheduleDao.java
index 7ca0a3915f5..284a42cf9e1 100644
--- a/engine/schema/src/main/java/com/cloud/storage/dao/SnapshotScheduleDao.java
+++ b/engine/schema/src/main/java/com/cloud/storage/dao/SnapshotScheduleDao.java
@@ -27,13 +27,11 @@ import com.cloud.utils.db.GenericDao;
  */
 public interface SnapshotScheduleDao extends GenericDao<SnapshotScheduleVO, 
Long> {
 
-    List<SnapshotScheduleVO> getCoincidingSnapshotSchedules(long volumeId, 
Date date);
-
     List<SnapshotScheduleVO> getSchedulesToExecute(Date currentTimestamp);
 
-    SnapshotScheduleVO getCurrentSchedule(Long volumeId, Long policyId, 
boolean executing);
+    List<SnapshotScheduleVO> getSchedulesAssignedWithAsyncJob();
 
-    SnapshotScheduleVO findOneByVolume(long volumeId);
+    SnapshotScheduleVO getCurrentSchedule(Long volumeId, Long policyId, 
boolean executing);
 
     SnapshotScheduleVO findOneByVolumePolicy(long volumeId, long policyId);
 
diff --git 
a/engine/schema/src/main/java/com/cloud/storage/dao/SnapshotScheduleDaoImpl.java
 
b/engine/schema/src/main/java/com/cloud/storage/dao/SnapshotScheduleDaoImpl.java
index 925d02dd90b..14669ce1d43 100644
--- 
a/engine/schema/src/main/java/com/cloud/storage/dao/SnapshotScheduleDaoImpl.java
+++ 
b/engine/schema/src/main/java/com/cloud/storage/dao/SnapshotScheduleDaoImpl.java
@@ -32,7 +32,7 @@ import com.cloud.utils.db.SearchCriteria;
 public class SnapshotScheduleDaoImpl extends 
GenericDaoBase<SnapshotScheduleVO, Long> implements SnapshotScheduleDao {
     protected final SearchBuilder<SnapshotScheduleVO> 
executableSchedulesSearch;
     protected final SearchBuilder<SnapshotScheduleVO> 
coincidingSchedulesSearch;
-    private final SearchBuilder<SnapshotScheduleVO> VolumeIdSearch;
+    protected final SearchBuilder<SnapshotScheduleVO> 
schedulesAssignedWithAsyncJob;
     private final SearchBuilder<SnapshotScheduleVO> VolumeIdPolicyIdSearch;
 
     protected SnapshotScheduleDaoImpl() {
@@ -48,36 +48,14 @@ public class SnapshotScheduleDaoImpl extends 
GenericDaoBase<SnapshotScheduleVO,
         coincidingSchedulesSearch.and("asyncJobId", 
coincidingSchedulesSearch.entity().getAsyncJobId(), SearchCriteria.Op.NULL);
         coincidingSchedulesSearch.done();
 
-        VolumeIdSearch = createSearchBuilder();
-        VolumeIdSearch.and("volumeId", VolumeIdSearch.entity().getVolumeId(), 
SearchCriteria.Op.EQ);
-        VolumeIdSearch.done();
-
         VolumeIdPolicyIdSearch = createSearchBuilder();
         VolumeIdPolicyIdSearch.and("volumeId", 
VolumeIdPolicyIdSearch.entity().getVolumeId(), SearchCriteria.Op.EQ);
         VolumeIdPolicyIdSearch.and("policyId", 
VolumeIdPolicyIdSearch.entity().getPolicyId(), SearchCriteria.Op.EQ);
         VolumeIdPolicyIdSearch.done();
 
-    }
-
-    /**
-     * {@inheritDoc}
-     */
-    @Override
-    public List<SnapshotScheduleVO> getCoincidingSnapshotSchedules(long 
volumeId, Date date) {
-        SearchCriteria<SnapshotScheduleVO> sc = 
coincidingSchedulesSearch.create();
-        sc.setParameters("volumeId", volumeId);
-        sc.setParameters("scheduledTimestamp", date);
-        // Don't return manual snapshots. They will be executed through another
-        // code path.
-        sc.addAnd("policyId", SearchCriteria.Op.NEQ, 1L);
-        return listBy(sc);
-    }
-
-    @Override
-    public SnapshotScheduleVO findOneByVolume(long volumeId) {
-        SearchCriteria<SnapshotScheduleVO> sc = VolumeIdSearch.create();
-        sc.setParameters("volumeId", volumeId);
-        return findOneBy(sc);
+        schedulesAssignedWithAsyncJob = createSearchBuilder();
+        schedulesAssignedWithAsyncJob.and("asyncJobId", 
schedulesAssignedWithAsyncJob.entity().getAsyncJobId(), 
SearchCriteria.Op.NNULL);
+        schedulesAssignedWithAsyncJob.done();
     }
 
     @Override
@@ -98,6 +76,11 @@ public class SnapshotScheduleDaoImpl extends 
GenericDaoBase<SnapshotScheduleVO,
         return listBy(sc);
     }
 
+    @Override
+    public List<SnapshotScheduleVO> getSchedulesAssignedWithAsyncJob() {
+        return listBy(schedulesAssignedWithAsyncJob.create());
+    }
+
     /**
      * {@inheritDoc}
      */
diff --git 
a/engine/schema/src/main/resources/META-INF/db/schema-41910to42000.sql 
b/engine/schema/src/main/resources/META-INF/db/schema-41910to42000.sql
index 2bcf41a4042..f7c78670ddd 100644
--- a/engine/schema/src/main/resources/META-INF/db/schema-41910to42000.sql
+++ b/engine/schema/src/main/resources/META-INF/db/schema-41910to42000.sql
@@ -90,6 +90,9 @@ CREATE TABLE IF NOT EXISTS 
`cloud_usage`.`quota_email_configuration`(
     CONSTRAINT `FK_quota_email_configuration_account_id` FOREIGN KEY 
(`account_id`) REFERENCES `cloud_usage`.`quota_account`(`account_id`),
     CONSTRAINT `FK_quota_email_configuration_email_template_id` FOREIGN KEY 
(`email_template_id`) REFERENCES `cloud_usage`.`quota_email_templates`(`id`));
 
+-- Remove on delete cascade from snapshot schedule
+ALTER TABLE `cloud`.`snapshot_schedule` DROP CONSTRAINT 
`fk__snapshot_schedule_async_job_id`;
+
 -- Add `is_implicit` column to `host_tags` table
 CALL `cloud`.`IDEMPOTENT_ADD_COLUMN`('cloud.host_tags', 'is_implicit', 'int(1) 
UNSIGNED NOT NULL DEFAULT 0 COMMENT "If host tag is implicit or explicit" ');
 
diff --git 
a/server/src/main/java/com/cloud/storage/snapshot/SnapshotSchedulerImpl.java 
b/server/src/main/java/com/cloud/storage/snapshot/SnapshotSchedulerImpl.java
index 29955066062..2a53021636c 100644
--- a/server/src/main/java/com/cloud/storage/snapshot/SnapshotSchedulerImpl.java
+++ b/server/src/main/java/com/cloud/storage/snapshot/SnapshotSchedulerImpl.java
@@ -35,6 +35,7 @@ import 
org.apache.cloudstack.framework.jobs.AsyncJobDispatcher;
 import org.apache.cloudstack.framework.jobs.AsyncJobManager;
 import org.apache.cloudstack.framework.jobs.dao.AsyncJobDao;
 import org.apache.cloudstack.framework.jobs.impl.AsyncJobVO;
+import org.apache.cloudstack.jobs.JobInfo;
 import org.apache.cloudstack.managed.context.ManagedContextTimerTask;
 import org.springframework.stereotype.Component;
 
@@ -47,7 +48,6 @@ import com.cloud.server.TaggedResourceService;
 import com.cloud.storage.Snapshot;
 import com.cloud.storage.SnapshotPolicyVO;
 import com.cloud.storage.SnapshotScheduleVO;
-import com.cloud.storage.SnapshotVO;
 import com.cloud.storage.VolumeVO;
 import com.cloud.storage.dao.SnapshotDao;
 import com.cloud.storage.dao.SnapshotPolicyDao;
@@ -64,7 +64,6 @@ import com.cloud.utils.component.ManagerBase;
 import com.cloud.utils.concurrency.TestClock;
 import com.cloud.utils.db.DB;
 import com.cloud.utils.db.GlobalLock;
-import com.cloud.utils.db.SearchCriteria;
 import com.cloud.utils.db.TransactionLegacy;
 import com.cloud.vm.snapshot.VMSnapshotManager;
 import com.cloud.vm.snapshot.VMSnapshotVO;
@@ -144,7 +143,7 @@ public class SnapshotSchedulerImpl extends ManagerBase 
implements SnapshotSchedu
         try {
             if (scanLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_COOPERATION)) {
                 try {
-                    checkStatusOfCurrentlyExecutingSnapshots();
+                    scheduleNextSnapshotJobsIfNecessary();
                 } finally {
                     scanLock.unlock();
                 }
@@ -174,68 +173,37 @@ public class SnapshotSchedulerImpl extends ManagerBase 
implements SnapshotSchedu
         }
     }
 
-    private void checkStatusOfCurrentlyExecutingSnapshots() {
-        final SearchCriteria<SnapshotScheduleVO> sc = 
_snapshotScheduleDao.createSearchCriteria();
-        sc.addAnd("asyncJobId", SearchCriteria.Op.NNULL);
-        final List<SnapshotScheduleVO> snapshotSchedules = 
_snapshotScheduleDao.search(sc, null);
-        for (final SnapshotScheduleVO snapshotSchedule : snapshotSchedules) {
-            final Long asyncJobId = snapshotSchedule.getAsyncJobId();
-            final AsyncJobVO asyncJob = 
_asyncJobDao.findByIdIncludingRemoved(asyncJobId);
-            switch (asyncJob.getStatus()) {
-                case SUCCEEDED:
-                    // The snapshot has been successfully backed up.
-                    // The snapshot state has also been cleaned up.
-                    // We can schedule the next job for this snapshot.
-                    // Remove the existing entry in the snapshot_schedule 
table.
-                    scheduleNextSnapshotJob(snapshotSchedule);
-                    break;
-                case FAILED:
-                    // Check the snapshot status.
-                    final Long snapshotId = snapshotSchedule.getSnapshotId();
-                    if (snapshotId == null) {
-                        // createSnapshotAsync exited, successfully or 
unsuccessfully,
-                        // even before creating a snapshot record
-                        // No cleanup needs to be done.
-                        // Schedule the next snapshot.
-                        scheduleNextSnapshotJob(snapshotSchedule);
-                    } else {
-                        final SnapshotVO snapshot = 
_snapshotDao.findById(snapshotId);
-                        if (snapshot == null || snapshot.getRemoved() != null) 
{
-                            // This snapshot has been deleted successfully 
from the primary storage
-                            // Again no cleanup needs to be done.
-                            // Schedule the next snapshot.
-                            // There's very little probability that the code 
reaches this point.
-                            // The snapshotId is a foreign key for the 
snapshot_schedule table
-                            // set to ON DELETE CASCADE. So if the snapshot 
entry is deleted, the snapshot_schedule entry will be too.
-                            // But what if it has only been marked as removed?
-                            scheduleNextSnapshotJob(snapshotSchedule);
-                        } else {
-                            // The management server executing this snapshot 
job appears to have crashed
-                            // while creating the snapshot on primary 
storage/or backing it up.
-                            // We have no idea whether the snapshot was 
successfully taken on the primary or not.
-                            // Schedule the next snapshot job.
-                            // The ValidatePreviousSnapshotCommand will take 
appropriate action on this snapshot
-                            // If the snapshot was taken successfully on 
primary, it will retry backing it up.
-                            // and cleanup the previous snapshot
-                            // Set the userId to that of system.
-                            //_snapshotManager.validateSnapshot(1L, snapshot);
-                            // In all cases, schedule the next snapshot job
-                            scheduleNextSnapshotJob(snapshotSchedule);
-                        }
-                    }
+    private void scheduleNextSnapshotJobsIfNecessary() {
+        List<SnapshotScheduleVO> snapshotSchedules = 
_snapshotScheduleDao.getSchedulesAssignedWithAsyncJob();
+        logger.info("Verifying the current state of [{}] snapshot schedules 
and scheduling next jobs, if necessary.", snapshotSchedules.size());
+        for (SnapshotScheduleVO snapshotSchedule : snapshotSchedules) {
+            scheduleNextSnapshotJobIfNecessary(snapshotSchedule);
+        }
+    }
 
-                    break;
-                case IN_PROGRESS:
-                    // There is no way of knowing from here whether
-                    // 1) Another management server is processing this 
snapshot job
-                    // 2) The management server has crashed and this snapshot 
is lying
-                    // around in an inconsistent state.
-                    // Hopefully, this can be resolved at the backend when the 
current snapshot gets executed.
-                    // But if it remains in this state, the current snapshot 
will not get executed.
-                    // And it will remain in stasis.
-                    break;
-            }
+    protected void scheduleNextSnapshotJobIfNecessary(SnapshotScheduleVO 
snapshotSchedule) {
+        Long asyncJobId = snapshotSchedule.getAsyncJobId();
+        AsyncJobVO asyncJob = 
_asyncJobDao.findByIdIncludingRemoved(asyncJobId);
+
+        if (asyncJob == null) {
+            logger.debug("The async job [{}] of snapshot schedule [{}] does 
not exist anymore. Considering it as finished and scheduling the next snapshot 
job.",
+                    asyncJobId, snapshotSchedule);
+            scheduleNextSnapshotJob(snapshotSchedule);
+            return;
         }
+
+        JobInfo.Status status = asyncJob.getStatus();
+
+        if (JobInfo.Status.SUCCEEDED.equals(status)) {
+            logger.debug("Last job of schedule [{}] succeeded; scheduling the 
next snapshot job.", snapshotSchedule);
+        } else if (JobInfo.Status.FAILED.equals(status)) {
+            logger.debug("Last job of schedule [{}] failed with [{}]; 
scheduling a new snapshot job.", snapshotSchedule, asyncJob.getResult());
+        } else {
+            logger.debug("Schedule [{}] is still in progress, skipping next 
job scheduling.", snapshotSchedule);
+            return;
+        }
+
+        scheduleNextSnapshotJob(snapshotSchedule);
     }
 
     @DB
diff --git 
a/server/src/test/java/com/cloud/storage/snapshot/SnapshotSchedulerImplTest.java
 
b/server/src/test/java/com/cloud/storage/snapshot/SnapshotSchedulerImplTest.java
index 971af289ef7..3827531891f 100644
--- 
a/server/src/test/java/com/cloud/storage/snapshot/SnapshotSchedulerImplTest.java
+++ 
b/server/src/test/java/com/cloud/storage/snapshot/SnapshotSchedulerImplTest.java
@@ -26,6 +26,9 @@ import com.cloud.storage.dao.VolumeDao;
 import com.cloud.user.Account;
 import com.cloud.user.AccountVO;
 import com.cloud.user.dao.AccountDao;
+import org.apache.cloudstack.framework.jobs.dao.AsyncJobDao;
+import org.apache.cloudstack.framework.jobs.impl.AsyncJobVO;
+import org.apache.cloudstack.jobs.JobInfo;
 import org.junit.Assert;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -65,6 +68,16 @@ public class SnapshotSchedulerImplTest {
     @Mock
     AccountVO accountVoMock;
 
+    @Mock
+    private SnapshotScheduleVO snapshotScheduleVoMock;
+
+    @Mock
+    private AsyncJobDao asyncJobDaoMock;
+
+    @Mock
+    private AsyncJobVO asyncJobVoMock;
+
+
     @Test
     public void scheduleNextSnapshotJobTestParameterIsNullReturnNull() {
         SnapshotScheduleVO snapshotScheduleVO = null;
@@ -215,4 +228,50 @@ public class SnapshotSchedulerImplTest {
 
         Mockito.verify(snapshotScheduleDaoMock, 
Mockito.never()).remove(Mockito.anyLong());
     }
+
+    @Test
+    public void 
scheduleNextSnapshotJobIfNecessaryTestAsyncJobIsNullThenScheduleNextSnapshot() {
+        Mockito.doReturn(1L).when(snapshotScheduleVoMock).getAsyncJobId();
+        
Mockito.doReturn(null).when(asyncJobDaoMock).findByIdIncludingRemoved(Mockito.any());
+        Mockito.doReturn(new 
Date()).when(snapshotSchedulerImplSpy).scheduleNextSnapshotJob(Mockito.any(SnapshotScheduleVO.class));
+
+        
snapshotSchedulerImplSpy.scheduleNextSnapshotJobIfNecessary(snapshotScheduleVoMock);
+
+        
Mockito.verify(snapshotSchedulerImplSpy).scheduleNextSnapshotJob(Mockito.any(SnapshotScheduleVO.class));
+    }
+
+    @Test
+    public void 
scheduleNextSnapshotJobIfNecessaryTestAsyncJobSucceededThenScheduleNextSnapshot()
 {
+        Mockito.doReturn(1L).when(snapshotScheduleVoMock).getAsyncJobId();
+        
Mockito.doReturn(asyncJobVoMock).when(asyncJobDaoMock).findByIdIncludingRemoved(Mockito.any());
+        
Mockito.doReturn(JobInfo.Status.SUCCEEDED).when(asyncJobVoMock).getStatus();
+        Mockito.doReturn(new 
Date()).when(snapshotSchedulerImplSpy).scheduleNextSnapshotJob(Mockito.any(SnapshotScheduleVO.class));
+
+        
snapshotSchedulerImplSpy.scheduleNextSnapshotJobIfNecessary(snapshotScheduleVoMock);
+
+        
Mockito.verify(snapshotSchedulerImplSpy).scheduleNextSnapshotJob(Mockito.any(SnapshotScheduleVO.class));
+    }
+
+    @Test
+    public void 
scheduleNextSnapshotJobIfNecessaryTestAsyncJobFailedThenScheduleNextSnapshot() {
+        Mockito.doReturn(1L).when(snapshotScheduleVoMock).getAsyncJobId();
+        
Mockito.doReturn(asyncJobVoMock).when(asyncJobDaoMock).findByIdIncludingRemoved(Mockito.any());
+        
Mockito.doReturn(JobInfo.Status.FAILED).when(asyncJobVoMock).getStatus();
+        Mockito.doReturn(new 
Date()).when(snapshotSchedulerImplSpy).scheduleNextSnapshotJob(Mockito.any(SnapshotScheduleVO.class));
+
+        
snapshotSchedulerImplSpy.scheduleNextSnapshotJobIfNecessary(snapshotScheduleVoMock);
+
+        
Mockito.verify(snapshotSchedulerImplSpy).scheduleNextSnapshotJob(Mockito.any(SnapshotScheduleVO.class));
+    }
+
+    @Test
+    public void 
scheduleNextSnapshotJobIfNecessaryTestAsyncJobInProgressThenDoNothing() {
+        Mockito.doReturn(1L).when(snapshotScheduleVoMock).getAsyncJobId();
+        
Mockito.doReturn(asyncJobVoMock).when(asyncJobDaoMock).findByIdIncludingRemoved(Mockito.any());
+        
Mockito.doReturn(JobInfo.Status.IN_PROGRESS).when(asyncJobVoMock).getStatus();
+
+        
snapshotSchedulerImplSpy.scheduleNextSnapshotJobIfNecessary(snapshotScheduleVoMock);
+
+        Mockito.verify(snapshotSchedulerImplSpy, 
Mockito.never()).scheduleNextSnapshotJob(Mockito.any(SnapshotScheduleVO.class));
+    }
 }

Reply via email to