Item 1: add the ability to track the first initial scheduled time for a job and an option to allow a job to run only for MaxRunInitSchedTime before being canceled

Origin: Thomas Lohman - thom...@mtl.mit.edu

Date:   12 March 2013

Status: patch attached

What: Hi, it may be that it is possible to do this another way but currently if you have a requirement that you want to start a job at time X, reschedule it on error every Y minutes but also make sure that after Z hours, the job is canceled no matter what, it doesn't look like there is a way to ensure that this happens. The MaxRunSchedTime option uses the time of the last re-scheduling as it's baseline so if you are using Reschedule On Error then MaxRunSchedTime doesn't accomplish what we wanted to do. Because of this, I made the following patch to the 5.2.13 source code (see attached unified diff file).

Why: We have a set of jobs that we want to definitely end at a known time - Z hours after they've been initially scheduled - no matter what state the job is in.

Notes: If there is another way to accomplish what we need to do then that would be great.

thanks very much,


--tom
--- src/dird/dird_conf.h.orig   2013-02-19 14:21:35.000000000 -0500
+++ src/dird/dird_conf.h        2013-03-04 13:45:16.000000000 -0500
@@ -394,6 +394,7 @@
    utime_t IncMaxRunTime;             /* Max Incremental job run time */
    utime_t MaxStartDelay;             /* max start delay in seconds */
    utime_t MaxRunSchedTime;           /* max run time in seconds from 
Scheduled time*/
+   utime_t MaxRunInitSchedTime;       /* max run time in seconds from Initial 
Scheduled time */
    utime_t RescheduleInterval;        /* Reschedule interval */
    utime_t MaxFullInterval;           /* Maximum time interval between Fulls */
    utime_t MaxDiffInterval;           /* Maximum time interval between Diffs */
--- src/dird/jobq.c.orig        2013-02-19 14:21:35.000000000 -0500
+++ src/dird/jobq.c     2013-03-04 13:48:21.000000000 -0500
@@ -681,6 +681,7 @@
       set_jcr_defaults(njcr, jcr->job);
       njcr->reschedule_count = jcr->reschedule_count;
       njcr->sched_time = jcr->sched_time;
+      njcr->initial_sched_time = jcr->initial_sched_time;
       /*
        * Special test here since a Virtual Full gets marked
        *  as a Full, so we look at the resource record
--- src/jcr.h.orig      2013-02-19 14:21:35.000000000 -0500
+++ src/jcr.h   2013-03-04 12:47:20.000000000 -0500
@@ -249,6 +249,7 @@
    volatile int32_t JobStatus;        /* ready, running, blocked, terminated */
    int32_t JobPriority;               /* Job priority */
    time_t sched_time;                 /* job schedule time, i.e. when it 
should start */
+   time_t initial_sched_time;         /* original sched time before any 
reschedules are done */   
    time_t start_time;                 /* when job actually started */
    time_t run_time;                   /* used for computing speed */
    time_t last_time;                  /* Last sample time */
@@ -334,6 +335,7 @@
    uint32_t MediaId;                  /* DB record IDs associated with this 
job */
    uint32_t FileIndex;                /* Last FileIndex processed */
    utime_t MaxRunSchedTime;           /* max run time in seconds from 
Scheduled time*/
+   utime_t MaxRunInitSchedTime;       /* max run time in seconds from Initial 
Scheduled time*/
    POOLMEM *fname;                    /* name to put into catalog */
    POOLMEM *component_fname;          /* Component info file name */
    FILE *component_fd;                /* Component info file desc */
--- src/dird/job.c.orig 2013-02-19 14:21:35.000000000 -0500
+++ src/dird/job.c      2013-03-04 13:26:14.000000000 -0500
@@ -43,6 +43,7 @@
 static bool job_check_maxwaittime(JCR *jcr);
 static bool job_check_maxruntime(JCR *jcr);
 static bool job_check_maxrunschedtime(JCR *jcr);
+static bool job_check_maxruninitschedtime(JCR *jcr);
 
 /* Imported subroutines */
 extern void term_scheduler();
@@ -277,6 +278,11 @@
       Jmsg(jcr, M_FATAL, 0, _("Job canceled because max run sched time 
exceeded.\n"));
    }
 
+   if (job_check_maxruninitschedtime(jcr)) {
+      jcr->setJobStatus(JS_Canceled);
+      Jmsg(jcr, M_FATAL, 0, _("Job canceled because max run init sched time 
exceeded.\n"));
+   }
+
    /* TODO : check if it is used somewhere */
    if (jcr->job->RunScripts == NULL) {
       Dmsg0(200, "Warning, job->RunScripts is empty\n");
@@ -557,6 +563,11 @@
          jcr->setJobStatus(JS_Canceled);
          Qmsg(jcr, M_FATAL, 0, _("Max run sched time exceeded. Job 
canceled.\n"));
          cancel = true;
+      /* check MaxRunInitSchedTime */ 
+      } else if (job_check_maxruninitschedtime(jcr)) {
+        jcr->setJobStatus(JS_Canceled);
+        Qmsg(jcr, M_FATAL, 0, _("Max run init sched time exceeded. Job 
canceled.\n"));
+        cancel = true;
       }
 
       if (cancel) {
@@ -662,6 +673,24 @@
 }
 
 /*
+ * Check if MaxRunInitSchedTime has expired and if the job can be
+ *   canceled.
+ */
+static bool job_check_maxruninitschedtime(JCR *jcr)
+{
+   if (jcr->MaxRunInitSchedTime == 0 || job_canceled(jcr)) {
+      return false;
+   }
+   if ((watchdog_time - jcr->initial_sched_time) < jcr->MaxRunInitSchedTime) {
+      Dmsg3(200, "Job %p (%s) with MaxRunInitSchedTime %d not expired\n",
+           jcr, jcr->Job, jcr->MaxRunInitSchedTime);
+      return false;
+   }
+
+   return true;
+}
+
+/*
  * Get or create a Pool record with the given name.
  * Returns: 0 on error
  *          poolid if OK
@@ -1196,6 +1225,7 @@
    jcr->write_part_after_job = job->write_part_after_job;
    jcr->IgnoreDuplicateJobChecking = job->IgnoreDuplicateJobChecking;
    jcr->MaxRunSchedTime = job->MaxRunSchedTime;
+   jcr->MaxRunInitSchedTime = job->MaxRunInitSchedTime;
    if (jcr->RestoreBootstrap) {
       free(jcr->RestoreBootstrap);
       jcr->RestoreBootstrap = NULL;
--- src/lib/jcr.c.orig  2013-02-19 14:21:35.000000000 -0500
+++ src/lib/jcr.c       2013-03-04 13:09:32.000000000 -0500
@@ -352,6 +352,7 @@
    }
    jcr->job_end_push.init(1, false);
    jcr->sched_time = time(NULL);
+   jcr->initial_sched_time = jcr->sched_time;
    jcr->daemon_free_jcr = daemon_free_jcr;    /* plug daemon free routine */
    jcr->init_mutex();
    jcr->inc_use_count();   
--- src/dird/dird_conf.c.orig   2013-02-19 14:21:35.000000000 -0500
+++ src/dird/dird_conf.c        2013-03-04 13:42:12.000000000 -0500
@@ -297,6 +297,7 @@
    {"writeverifylist",store_dir,ITEM(res_job.WriteVerifyList), 0, 0, 0},
    {"replace",  store_replace,  ITEM(res_job.replace), 0, ITEM_DEFAULT, 
REPLACE_ALWAYS},
    {"maxrunschedtime", store_time, ITEM(res_job.MaxRunSchedTime), 0, 0, 0},
+   {"maxruninitschedtime", store_time, ITEM(res_job.MaxRunInitSchedTime), 0, 
0, 0},
    {"maxruntime",   store_time, ITEM(res_job.MaxRunTime), 0, 0, 0},
    /* xxxMaxWaitTime are deprecated */
    {"fullmaxwaittime",  store_time, ITEM(res_job.FullMaxRunTime), 0, 0, 0},
@@ -724,6 +725,10 @@
       if (res->res_job.MaxRunSchedTime) {
          sendit(sock, _("  --> MaxRunSchedTime=%u\n"), 
res->res_job.MaxRunSchedTime);
       }
+      if (res->res_job.MaxRunInitSchedTime) {
+        sendit(sock, _("  --> MaxRunInitSchedTime=%u\n"), 
res->res_job.MaxRunInitSchedTime);
+      }
+
       if (res->res_job.storage) {
          STORE *store;
          foreach_alist(store, res->res_job.storage) {
------------------------------------------------------------------------------
Everyone hates slow websites. So do we.
Make your web apps faster with AppDynamics
Download AppDynamics Lite for free today:
http://p.sf.net/sfu/appdyn_d2d_mar
_______________________________________________
Bacula-devel mailing list
Bacula-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/bacula-devel

Reply via email to