Item 1: add the ability to track the first initial scheduled time for a
job and an option to allow a job to run only for MaxRunInitSchedTime
before being canceled
Origin: Thomas Lohman - thom...@mtl.mit.edu
Date: 12 March 2013
Status: patch attached
What: Hi, it may be that it is possible to do this another way but
currently if you have a requirement that you want to start a job at time
X, reschedule it on error every Y minutes but also make sure that after
Z hours, the job is canceled no matter what, it doesn't look like there
is a way to ensure that this happens. The MaxRunSchedTime option uses
the time of the last re-scheduling as it's baseline so if you are using
Reschedule On Error then MaxRunSchedTime doesn't accomplish what we
wanted to do. Because of this, I made the following patch to the 5.2.13
source code (see attached unified diff file).
Why: We have a set of jobs that we want to definitely end at a known
time - Z hours after they've been initially scheduled - no matter what
state the job is in.
Notes: If there is another way to accomplish what we need to do then
that would be great.
thanks very much,
--tom
--- src/dird/dird_conf.h.orig 2013-02-19 14:21:35.000000000 -0500
+++ src/dird/dird_conf.h 2013-03-04 13:45:16.000000000 -0500
@@ -394,6 +394,7 @@
utime_t IncMaxRunTime; /* Max Incremental job run time */
utime_t MaxStartDelay; /* max start delay in seconds */
utime_t MaxRunSchedTime; /* max run time in seconds from
Scheduled time*/
+ utime_t MaxRunInitSchedTime; /* max run time in seconds from Initial
Scheduled time */
utime_t RescheduleInterval; /* Reschedule interval */
utime_t MaxFullInterval; /* Maximum time interval between Fulls */
utime_t MaxDiffInterval; /* Maximum time interval between Diffs */
--- src/dird/jobq.c.orig 2013-02-19 14:21:35.000000000 -0500
+++ src/dird/jobq.c 2013-03-04 13:48:21.000000000 -0500
@@ -681,6 +681,7 @@
set_jcr_defaults(njcr, jcr->job);
njcr->reschedule_count = jcr->reschedule_count;
njcr->sched_time = jcr->sched_time;
+ njcr->initial_sched_time = jcr->initial_sched_time;
/*
* Special test here since a Virtual Full gets marked
* as a Full, so we look at the resource record
--- src/jcr.h.orig 2013-02-19 14:21:35.000000000 -0500
+++ src/jcr.h 2013-03-04 12:47:20.000000000 -0500
@@ -249,6 +249,7 @@
volatile int32_t JobStatus; /* ready, running, blocked, terminated */
int32_t JobPriority; /* Job priority */
time_t sched_time; /* job schedule time, i.e. when it
should start */
+ time_t initial_sched_time; /* original sched time before any
reschedules are done */
time_t start_time; /* when job actually started */
time_t run_time; /* used for computing speed */
time_t last_time; /* Last sample time */
@@ -334,6 +335,7 @@
uint32_t MediaId; /* DB record IDs associated with this
job */
uint32_t FileIndex; /* Last FileIndex processed */
utime_t MaxRunSchedTime; /* max run time in seconds from
Scheduled time*/
+ utime_t MaxRunInitSchedTime; /* max run time in seconds from Initial
Scheduled time*/
POOLMEM *fname; /* name to put into catalog */
POOLMEM *component_fname; /* Component info file name */
FILE *component_fd; /* Component info file desc */
--- src/dird/job.c.orig 2013-02-19 14:21:35.000000000 -0500
+++ src/dird/job.c 2013-03-04 13:26:14.000000000 -0500
@@ -43,6 +43,7 @@
static bool job_check_maxwaittime(JCR *jcr);
static bool job_check_maxruntime(JCR *jcr);
static bool job_check_maxrunschedtime(JCR *jcr);
+static bool job_check_maxruninitschedtime(JCR *jcr);
/* Imported subroutines */
extern void term_scheduler();
@@ -277,6 +278,11 @@
Jmsg(jcr, M_FATAL, 0, _("Job canceled because max run sched time
exceeded.\n"));
}
+ if (job_check_maxruninitschedtime(jcr)) {
+ jcr->setJobStatus(JS_Canceled);
+ Jmsg(jcr, M_FATAL, 0, _("Job canceled because max run init sched time
exceeded.\n"));
+ }
+
/* TODO : check if it is used somewhere */
if (jcr->job->RunScripts == NULL) {
Dmsg0(200, "Warning, job->RunScripts is empty\n");
@@ -557,6 +563,11 @@
jcr->setJobStatus(JS_Canceled);
Qmsg(jcr, M_FATAL, 0, _("Max run sched time exceeded. Job
canceled.\n"));
cancel = true;
+ /* check MaxRunInitSchedTime */
+ } else if (job_check_maxruninitschedtime(jcr)) {
+ jcr->setJobStatus(JS_Canceled);
+ Qmsg(jcr, M_FATAL, 0, _("Max run init sched time exceeded. Job
canceled.\n"));
+ cancel = true;
}
if (cancel) {
@@ -662,6 +673,24 @@
}
/*
+ * Check if MaxRunInitSchedTime has expired and if the job can be
+ * canceled.
+ */
+static bool job_check_maxruninitschedtime(JCR *jcr)
+{
+ if (jcr->MaxRunInitSchedTime == 0 || job_canceled(jcr)) {
+ return false;
+ }
+ if ((watchdog_time - jcr->initial_sched_time) < jcr->MaxRunInitSchedTime) {
+ Dmsg3(200, "Job %p (%s) with MaxRunInitSchedTime %d not expired\n",
+ jcr, jcr->Job, jcr->MaxRunInitSchedTime);
+ return false;
+ }
+
+ return true;
+}
+
+/*
* Get or create a Pool record with the given name.
* Returns: 0 on error
* poolid if OK
@@ -1196,6 +1225,7 @@
jcr->write_part_after_job = job->write_part_after_job;
jcr->IgnoreDuplicateJobChecking = job->IgnoreDuplicateJobChecking;
jcr->MaxRunSchedTime = job->MaxRunSchedTime;
+ jcr->MaxRunInitSchedTime = job->MaxRunInitSchedTime;
if (jcr->RestoreBootstrap) {
free(jcr->RestoreBootstrap);
jcr->RestoreBootstrap = NULL;
--- src/lib/jcr.c.orig 2013-02-19 14:21:35.000000000 -0500
+++ src/lib/jcr.c 2013-03-04 13:09:32.000000000 -0500
@@ -352,6 +352,7 @@
}
jcr->job_end_push.init(1, false);
jcr->sched_time = time(NULL);
+ jcr->initial_sched_time = jcr->sched_time;
jcr->daemon_free_jcr = daemon_free_jcr; /* plug daemon free routine */
jcr->init_mutex();
jcr->inc_use_count();
--- src/dird/dird_conf.c.orig 2013-02-19 14:21:35.000000000 -0500
+++ src/dird/dird_conf.c 2013-03-04 13:42:12.000000000 -0500
@@ -297,6 +297,7 @@
{"writeverifylist",store_dir,ITEM(res_job.WriteVerifyList), 0, 0, 0},
{"replace", store_replace, ITEM(res_job.replace), 0, ITEM_DEFAULT,
REPLACE_ALWAYS},
{"maxrunschedtime", store_time, ITEM(res_job.MaxRunSchedTime), 0, 0, 0},
+ {"maxruninitschedtime", store_time, ITEM(res_job.MaxRunInitSchedTime), 0,
0, 0},
{"maxruntime", store_time, ITEM(res_job.MaxRunTime), 0, 0, 0},
/* xxxMaxWaitTime are deprecated */
{"fullmaxwaittime", store_time, ITEM(res_job.FullMaxRunTime), 0, 0, 0},
@@ -724,6 +725,10 @@
if (res->res_job.MaxRunSchedTime) {
sendit(sock, _(" --> MaxRunSchedTime=%u\n"),
res->res_job.MaxRunSchedTime);
}
+ if (res->res_job.MaxRunInitSchedTime) {
+ sendit(sock, _(" --> MaxRunInitSchedTime=%u\n"),
res->res_job.MaxRunInitSchedTime);
+ }
+
if (res->res_job.storage) {
STORE *store;
foreach_alist(store, res->res_job.storage) {
------------------------------------------------------------------------------
Everyone hates slow websites. So do we.
Make your web apps faster with AppDynamics
Download AppDynamics Lite for free today:
http://p.sf.net/sfu/appdyn_d2d_mar
_______________________________________________
Bacula-devel mailing list
Bacula-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/bacula-devel