- Can we write the state file after every job terminates? On Win32
the system crashes and the state file is not updated.
Not just on Win32 :-) I have a similar problem sometimes, because I
don't live in a large town and I don't have a UPS.
I've changed the lib code and the three daemons, so that each state
file gets updated at the end of every job.
What I had to do was:
1) Update the functions free_common_jcr and free_jcr in lib/jcr.c, to
move the code which updates last_job. This code was originally in
free_common_jcr, and I moved it to free_jcr, so that it would be
executed earlier in the process. last_job needs to be updated
before the call of jcr->daemon_free_jcr(jcr); -- see 2) below.
[free_common_jcr is only ever called from free_jcr]
2) Add a call to write_state_file() to each of the daemons' own
free_jcr functions (dird_free_jcr, stored_free_jcr, filed_free_jcr)
so that the daemon will update its state file immediately after
free_jcr has updated last_job.
I've used this patch on 2.0.2 for several months, and on 2.2.0 for
a week or two now, with no problems. I've attached the patch (diff -u)
as four separate xxx.c.patch files.
However, it's not perfect - the state file is updated at the end of
*every* job, which means that exiting bat, bconsole etc. causes the
state file to be written.
Allan
--- src/dird/job.c.orig Mon Jul 16 13:54:42 2007
+++ src/dird/job.c Wed Aug 15 10:59:43 2007
@@ -843,6 +843,9 @@
free_rwstorage(jcr);
jcr->job_end_push.destroy();
+
+ write_state_file(director->working_directory, "bacula-dir", get_first_port_host_order(director->DIRaddrs));
+
Dmsg0(200, "End dird free_jcr\n");
}
--- src/filed/job.c.orig Sun Jul 22 13:21:06 2007
+++ src/filed/job.c Wed Aug 15 11:03:17 2007
@@ -1779,6 +1779,8 @@
free_runscripts(jcr->RunScripts);
delete jcr->RunScripts;
+ write_state_file(me->working_directory, "bacula-fd", get_first_port_host_order(me->FDaddrs));
+
return;
}
--- src/lib/jcr.c.orig Sun Jul 22 13:21:06 2007
+++ src/lib/jcr.c Wed Aug 15 10:48:04 2007
@@ -306,47 +306,6 @@
*/
static void free_common_jcr(JCR *jcr)
{
- struct s_last_job *je, last_job;
-
- /* Keep some statistics */
- switch (jcr->JobType) {
- case JT_BACKUP:
- case JT_VERIFY:
- case JT_RESTORE:
- case JT_MIGRATE:
- case JT_COPY:
- case JT_ADMIN:
- num_jobs_run++;
- last_job.Errors = jcr->Errors;
- last_job.JobType = jcr->JobType;
- last_job.JobId = jcr->JobId;
- last_job.VolSessionId = jcr->VolSessionId;
- last_job.VolSessionTime = jcr->VolSessionTime;
- bstrncpy(last_job.Job, jcr->Job, sizeof(last_job.Job));
- last_job.JobFiles = jcr->JobFiles;
- last_job.JobBytes = jcr->JobBytes;
- last_job.JobStatus = jcr->JobStatus;
- last_job.JobLevel = jcr->JobLevel;
- last_job.start_time = jcr->start_time;
- last_job.end_time = time(NULL);
- /* Keep list of last jobs, but not Console where JobId==0 */
- if (last_job.JobId > 0) {
- je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
- memcpy((char *)je, (char *)&last_job, sizeof(last_job));
- if (!last_jobs) {
- init_last_jobs_list();
- }
- last_jobs->append(je);
- if (last_jobs->size() > max_last_jobs) {
- je = (struct s_last_job *)last_jobs->first();
- last_jobs->remove(je);
- free(je);
- }
- }
- break;
- default:
- break;
- }
jcr->destroy_mutex();
if (jcr->msg_queue) {
@@ -412,6 +371,8 @@
#ifdef DEBUG
void b_free_jcr(const char *file, int line, JCR *jcr)
{
+ struct s_last_job *je, last_job;
+
Dmsg3(3400, "Enter free_jcr 0x%x from %s:%d\n", jcr, file, line);
#else
@@ -418,6 +379,7 @@
void free_jcr(JCR *jcr)
{
+ struct s_last_job *je, last_job;
Dmsg2(3400, "Enter free_jcr 0x%x job=%d\n", jcr, jcr->JobId);
@@ -443,6 +405,47 @@
job_end_pop(jcr); /* pop and call hooked routines */
Dmsg1(3400, "End job=%d\n", jcr->JobId);
+
+ /* Keep some statistics */
+ switch (jcr->JobType) {
+ case JT_BACKUP:
+ case JT_VERIFY:
+ case JT_RESTORE:
+ case JT_MIGRATE:
+ case JT_COPY:
+ case JT_ADMIN:
+ num_jobs_run++;
+ last_job.Errors = jcr->Errors;
+ last_job.JobType = jcr->JobType;
+ last_job.JobId = jcr->JobId;
+ last_job.VolSessionId = jcr->VolSessionId;
+ last_job.VolSessionTime = jcr->VolSessionTime;
+ bstrncpy(last_job.Job, jcr->Job, sizeof(last_job.Job));
+ last_job.JobFiles = jcr->JobFiles;
+ last_job.JobBytes = jcr->JobBytes;
+ last_job.JobStatus = jcr->JobStatus;
+ last_job.JobLevel = jcr->JobLevel;
+ last_job.start_time = jcr->start_time;
+ last_job.end_time = time(NULL);
+ /* Keep list of last jobs, but not Console where JobId==0 */
+ if (last_job.JobId > 0) {
+ je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
+ memcpy((char *)je, (char *)&last_job, sizeof(last_job));
+ if (!last_jobs) {
+ init_last_jobs_list();
+ }
+ last_jobs->append(je);
+ if (last_jobs->size() > max_last_jobs) {
+ je = (struct s_last_job *)last_jobs->first();
+ last_jobs->remove(je);
+ free(je);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
if (jcr->daemon_free_jcr) {
jcr->daemon_free_jcr(jcr); /* call daemon free routine */
}
--- src/stored/job.c.orig Thu Aug 9 10:52:28 2007
+++ src/stored/job.c Wed Aug 15 11:01:55 2007
@@ -388,5 +388,8 @@
jcr->write_store = NULL;
}
Dsm_check(1);
+
+ write_state_file(me->working_directory, "bacula-sd", get_first_port_host_order(me->sdaddrs));
+
return;
}
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems? Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
_______________________________________________
Bacula-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/bacula-devel