mooli tayer has uploaded a new change for review.

Change subject: backend: bugfix in VdsManager on failedToRunVm
......................................................................

backend: bugfix in VdsManager on failedToRunVm

Bug description: onVdsDuringFailureTimer assumes that number
of attempts is always >= 0 but this assumption is not always
true. The reason for this is that pausing a quartz job does not
insure it will not run anymore but simply pauses the trigger,
so previously scheduled jobs might still run. This is resolved 
by using simpler one time scheduling.

This patch introduces a behavioral change:
Previously onVdsDuringFailureTimer would reduce failed attempt
by 1 (which is problamatic since it no longer represents failed
attempts when it is decremented by 1 every x time). Previously
there would be a competition between onVdsDuringFailureTimer calls
to failedToRunVm calls. Now whenever attempt limit is reached
number of attempts is set to 0. The effect of this is if we have
a host failing to run vms and those vms were able to run on other hosts
and we detect no oter problem on in (its status is up)
it will keep trying to recover every 30 min (default) regardless.
New behavior is consistent and easy to understand.

Change-Id: Ia4dd140ceecf4954e65ea3f6174a41acea82f6a6
Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1066693
Signed-off-by: Mooli Tayer <[email protected]>
---
M 
backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
1 file changed, 15 insertions(+), 23 deletions(-)


  git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/60/26460/1

diff --git 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
index b289fc9..63b0523 100644
--- 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
+++ 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
@@ -99,7 +99,6 @@
 
     private int VDS_DURING_FAILURE_TIMEOUT_IN_MINUTES = Config
             .<Integer> 
getValue(ConfigValues.TimeToReduceFailedRunOnVdsInMinutes);
-    private String duringFailureJobId;
     private boolean privateInitialized;
 
     public boolean getInitialized() {
@@ -171,10 +170,6 @@
 
     public void schedulJobs() {
         SchedulerUtil sched = SchedulerUtilQuartzImpl.getInstance();
-        duringFailureJobId = sched.scheduleAFixedDelayJob(this, 
"onVdsDuringFailureTimer", new Class[0],
-                    new Object[0], VDS_DURING_FAILURE_TIMEOUT_IN_MINUTES, 
VDS_DURING_FAILURE_TIMEOUT_IN_MINUTES,
-                    TimeUnit.MINUTES);
-        sched.pauseJob(duringFailureJobId);
         // start with refresh statistics
         _refreshIteration = _numberRefreshesBeforeSave - 1;
 
@@ -480,24 +475,18 @@
      */
     @OnTimerMethodAnnotation("onVdsDuringFailureTimer")
     public void onVdsDuringFailureTimer() {
-        synchronized (getLockObj()) {
-            VDS vds = DbFacade.getInstance().getVdsDao().get(getVdsId());
-            /**
-             * Disable timer if vds returns from suspicious mode
-             */
-            if (mFailedToRunVmAttempts.decrementAndGet() == 0) {
-                
SchedulerUtilQuartzImpl.getInstance().pauseJob(duringFailureJobId);
-            }
-            /**
-             * Move vds to Up status from error
-             */
-            if (mFailedToRunVmAttempts.get() < Config.<Integer> 
getValue(ConfigValues.NumberOfFailedRunsOnVds)
-                    && vds.getStatus() == VDSStatus.Error) {
-                setStatus(VDSStatus.Up, vds);
-                
DbFacade.getInstance().getVdsDynamicDao().updateStatus(getVdsId(), 
VDSStatus.Up);
-            }
-            log.infoFormat("onVdsDuringFailureTimer of Host {0} entered after 
{1} attempts to run a VM", vds.getName(),
+        VDS vds = DbFacade.getInstance().getVdsDao().get(getVdsId());
+
+        /**
+         * Move vds to Up status from error
+         */
+        if (vds.getStatus() == VDSStatus.Error) {
+            setStatus(VDSStatus.Up, vds);
+            DbFacade.getInstance().getVdsDynamicDao().updateStatus(getVdsId(), 
VDSStatus.Up);
+            log.infoFormat("onVdsDuringFailureTimer of Host {0} entered after 
{1} attempts to run a VM",
+                    vds.getName(),
                     mFailedToRunVmAttempts);
+            mFailedToRunVmAttempts.set(0);
         }
     }
 
@@ -516,7 +505,10 @@
             
ResourceManager.getInstance().runVdsCommand(VDSCommandType.SetVdsStatus,
                     new SetVdsStatusVDSCommandParameters(vds.getId(), 
VDSStatus.Error));
 
-            
SchedulerUtilQuartzImpl.getInstance().resumeJob(duringFailureJobId);
+            SchedulerUtil sched = SchedulerUtilQuartzImpl.getInstance();
+            sched.scheduleAOneTimeJob(this, "onVdsDuringFailureTimer", new 
Class[0],
+                    new Object[0], VDS_DURING_FAILURE_TIMEOUT_IN_MINUTES,
+                    TimeUnit.MINUTES);
             AuditLogableBase logable = new AuditLogableBase(vds.getId());
             logable.addCustomValue("Time", Config.<Integer> 
getValue(ConfigValues.TimeToReduceFailedRunOnVdsInMinutes)
                     .toString());


-- 
To view, visit http://gerrit.ovirt.org/26460
To unsubscribe, visit http://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia4dd140ceecf4954e65ea3f6174a41acea82f6a6
Gerrit-PatchSet: 1
Gerrit-Project: ovirt-engine
Gerrit-Branch: ovirt-engine-3.4
Gerrit-Owner: mooli tayer <[email protected]>
_______________________________________________
Engine-patches mailing list
[email protected]
http://lists.ovirt.org/mailman/listinfo/engine-patches

Reply via email to