Re: What causes this error?

2010-11-16 Thread Robert Heller
At Mon, 15 Nov 2010 18:03:23 -0600 Rhys Rhaven r...@rhavenindustrys.com wrote:

 
 Looks like the bug that I posted, which I believe Jean-Louis sent the 
 patch for. I've been unable to test it as of yet.

I compared my logs with your logs and it is much the same.

I cannot find the patch you mentioned in the E-Mail archives.  Can you
(or Jean-Louis) re-post it?

 
 On 11/15/2010 04:11 PM, Robert Heller wrote:
  At Mon, 15 Nov 2010 15:34:31 -0500 Jean-Louis 
  Martineaumartin...@zmanda.com  wrote:
 
 
  Because ravel:/mnt/RAID0 was not dumped.
  Check the email report to know why or the client debug file (on ravel)
   
  The E-Mail report just says:
 
  FAILURE DUMP SUMMARY:
 ravel /mnt/RAID0 RESULTS MISSING
 ravel /mnt/RAID0 lev 1  FAILED [can't dump in degraded mode]
 
 
  And there isn't a sendbackup debug file for /mnt/RAID0.  There are
  sendbackup debug files for the other file systems on ravel. There is a
  sendsize debug file for /mnt/RAID0 on ravel.
 
 
  Jean-Louis
 
  Robert Heller wrote:
   
  Using /var/log/amanda/60villagedrive/amdump.1
   From Mon Nov 15 00:10:02 EST 2010
 
  bach:/   183m finished (0:17:59)
  haydn:/  147m finished (0:20:35)
  ravel:/  189m finished (0:20:36)
  ravel:/mnt/RAID0 1   816m failed: process terminated while waiting 
  for dumping
  ravel:/mnt/sdb2  1 0m finished (0:20:34)
  ..
 
  This is in a CentOS 5.5 system:
 
  -sh-3.2$ rpm -qa amanda\* perl dump
  amanda-backup_server-3.2.0-1.rhel5
  dump-0.4b41-4.el5
  perl-5.8.8-32.el5_5.2
 
 
 
 
 
 
   
 
 
   

 

-- 
Robert Heller -- 978-544-6933 / hel...@deepsoft.com
Deepwoods Software-- http://www.deepsoft.com/
()  ascii ribbon campaign -- against html e-mail
/\  www.asciiribbon.org   -- against proprietary attachments


  


Re: What causes this error?

2010-11-16 Thread Jean-Louis Martineau

I attach the patch that was committed.

Jean-Louis

Robert Heller wrote:

At Mon, 15 Nov 2010 18:03:23 -0600 Rhys Rhaven r...@rhavenindustrys.com wrote:

  
Looks like the bug that I posted, which I believe Jean-Louis sent the 
patch for. I've been unable to test it as of yet.



I compared my logs with your logs and it is much the same.

I cannot find the patch you mentioned in the E-Mail archives.  Can you
(or Jean-Louis) re-post it?
  


Index: server-src/driver.c
===
--- server-src/driver.c	(revision 3607)
+++ server-src/driver.c	(revision 3608)
@@ -499,32 +499,10 @@
 	headqueue_disk(directq, diskp);
 }
 
-/* handle any remaining dumps by dumping directly to tape, if possible */
-while(!empty(directq)  taper_fd  0) {
-	time_t  sleep_time  = 1;
-	disk_t *sleep_diskp = NULL;
-	time_t  now = time(0);
+/* log error for any remaining dumps */
+while(!empty(directq)) {
+	diskp = dequeue_disk(directq);
 
-	/* Find one we can do immediately or the sonner */
-	for (diskp = directq.head; diskp != NULL; diskp = diskp-next) {
-	if (diskp-to_holdingdisk == HOLD_REQUIRED ||
-		degraded_mode) {
-		sleep_time = 0;
-		sleep_diskp = diskp;
-	} else if (diskp-host-start_t - now  sleep_time 
-		   diskp-start_t -now  sleep_time) {
-		if (diskp-host-start_t  diskp-start_t)
-		sleep_time = diskp-host-start_t - now;
-		else
-		sleep_time = diskp-start_t - now;
-		sleep_diskp = diskp;
-	}
-	}
-	diskp = sleep_diskp;
-	if (sleep_time  0)
-	sleep(sleep_time);
-	remove_disk(directq, diskp);
-
 	if (diskp-to_holdingdisk == HOLD_REQUIRED) {
 	char *qname = quote_string(diskp-name);
 	log_add(L_FAIL, %s %s %s %d [%s],
@@ -538,7 +516,7 @@
 	log_add(L_FAIL, %s %s %s %d [%s],
 		diskp-host-hostname, qname, sched(diskp)-datestamp,
 		sched(diskp)-level,
-		_(can't dump in degraded mode));
+		_(can't dump in non degraded mode));
 	amfree(qname);
 	}
 	else {
@@ -555,10 +533,6 @@
 	}
 }
 
-/* fill up the tape or start new one for taperflush */
-startaflush();
-event_loop(0);
-
 short_dump_state();/* for amstatus */
 
 g_printf(_(driver: QUITTING time %s telling children to quit\n),
@@ -1044,6 +1018,7 @@
 		enqueue_disk(directq, diskp);
 		diskp-to_holdingdisk = HOLD_NEVER;
 	}
+	if (empty(*rq)) force_flush = 1;
 	}
 } else if (client_constrained(diskp)) {
 	free_assignedhd(holdp);
@@ -1326,6 +1301,7 @@
 		dumper_cmd(dumper, PORT_DUMP, diskp, NULL);
 	}
 	diskp-host-start_t = now + 15;
+	if (empty(*rq)) force_flush = 1;
 
 	if (result_argv)
 		g_strfreev(result_argv);
@@ -2114,6 +2090,7 @@
 dp-inprogress = 0;
 deallocate_bandwidth(dp-host-netif, sched(dp)-est_kps);
 taper-dumper = NULL;
+taper-disk = NULL;
 sched(dp)-dumper = NULL;
 sched(dp)-taper = NULL;
 start_some_dumps(runq);
@@ -2759,7 +2736,7 @@
 	continue;
 	}
 
-	if(file.dumplevel  0 || file.dumplevel  9) {
+	if (file.dumplevel  0 || file.dumplevel  399) {
 	log_add(L_INFO, _(%s: ignoring file with bogus dump level %d.),
 		destname, file.dumplevel);
 	amfree(destname);
@@ -2825,6 +2802,8 @@
 if (!nodump) {
 	schedule_ev_read = event_register((event_id_t)0, EV_READFD,
 	  read_schedule, NULL);
+} else {
+	force_flush = 1;
 }
 }
 
@@ -3137,6 +3116,7 @@
 	log_add(L_WARNING, _(WARNING: got empty schedule from planner));
 if(need_degraded==1) start_degraded_mode(runq);
 schedule_done = 1;
+if (empty(runq)) force_flush = 1;
 start_some_dumps(runq);
 startaflush();
 }
@@ -3805,13 +3785,9 @@
 }
 
 // when to start a flush
-// We don't start a flush if taper_tape_started == 1  dump_to_disk_terminated  force_flush == 0,
-// it is a criteria need to exit the first event_loop without flushing everything to tape,
-// they will be flush in another event_loop.
 if (taper-state  TAPER_STATE_IDLE) {
 	if (!degraded_mode  (!empty(tapeq) || !empty(directq)) 
-	(((taper-state  TAPER_STATE_TAPE_STARTED) 
-	  force_flush == 1) ||// if tape already started and force_flush
+	(taper-state  TAPER_STATE_TAPE_STARTED ||		// tape already started 
  !empty(roomq) ||	// holding disk constraint
  idle_reason == IDLE_NO_DISKSPACE ||		// holding disk constraint
  (my_flush_threshold_dumped  tapeq_size 		// flush-threshold-dumped 


What causes this error?

2010-11-15 Thread Robert Heller
Using /var/log/amanda/60villagedrive/amdump.1
From Mon Nov 15 00:10:02 EST 2010

bach:/   183m finished (0:17:59)
haydn:/  147m finished (0:20:35)
ravel:/  189m finished (0:20:36)
ravel:/mnt/RAID0 1   816m failed: process terminated while waiting for 
dumping 
ravel:/mnt/sdb2  1 0m finished (0:20:34)
..

This is in a CentOS 5.5 system:

-sh-3.2$ rpm -qa amanda\* perl dump 
amanda-backup_server-3.2.0-1.rhel5
dump-0.4b41-4.el5
perl-5.8.8-32.el5_5.2



-- 
Robert Heller -- 978-544-6933 / hel...@deepsoft.com
Deepwoods Software-- http://www.deepsoft.com/
()  ascii ribbon campaign -- against html e-mail
/\  www.asciiribbon.org   -- against proprietary attachments


  


Re: What causes this error?

2010-11-15 Thread Jean-Louis Martineau

Because ravel:/mnt/RAID0 was not dumped.
Check the email report to know why or the client debug file (on ravel)

Jean-Louis

Robert Heller wrote:

Using /var/log/amanda/60villagedrive/amdump.1
From Mon Nov 15 00:10:02 EST 2010

bach:/   183m finished (0:17:59)
haydn:/  147m finished (0:20:35)
ravel:/  189m finished (0:20:36)
ravel:/mnt/RAID0 1   816m failed: process terminated while waiting for dumping 
ravel:/mnt/sdb2  1 0m finished (0:20:34)

..

This is in a CentOS 5.5 system:

-sh-3.2$ rpm -qa amanda\* perl dump 
amanda-backup_server-3.2.0-1.rhel5

dump-0.4b41-4.el5
perl-5.8.8-32.el5_5.2



  




Re: What causes this error?

2010-11-15 Thread Robert Heller
At Mon, 15 Nov 2010 15:34:31 -0500 Jean-Louis Martineau martin...@zmanda.com 
wrote:

 
 Because ravel:/mnt/RAID0 was not dumped.
 Check the email report to know why or the client debug file (on ravel)

The E-Mail report just says:

FAILURE DUMP SUMMARY:
  ravel /mnt/RAID0 RESULTS MISSING
  ravel /mnt/RAID0 lev 1  FAILED [can't dump in degraded mode]


And there isn't a sendbackup debug file for /mnt/RAID0.  There are
sendbackup debug files for the other file systems on ravel. There is a
sendsize debug file for /mnt/RAID0 on ravel.

 
 Jean-Louis
 
 Robert Heller wrote:
  Using /var/log/amanda/60villagedrive/amdump.1
  From Mon Nov 15 00:10:02 EST 2010
 
  bach:/   183m finished (0:17:59)
  haydn:/  147m finished (0:20:35)
  ravel:/  189m finished (0:20:36)
  ravel:/mnt/RAID0 1   816m failed: process terminated while waiting for 
  dumping 
  ravel:/mnt/sdb2  1 0m finished (0:20:34)
  ..
 
  This is in a CentOS 5.5 system:
 
  -sh-3.2$ rpm -qa amanda\* perl dump 
  amanda-backup_server-3.2.0-1.rhel5
  dump-0.4b41-4.el5
  perl-5.8.8-32.el5_5.2
 
 
 

 


-- 
Robert Heller -- 978-544-6933 / hel...@deepsoft.com
Deepwoods Software-- http://www.deepsoft.com/
()  ascii ribbon campaign -- against html e-mail
/\  www.asciiribbon.org   -- against proprietary attachments






Re: What causes this error?

2010-11-15 Thread Rhys Rhaven
Looks like the bug that I posted, which I believe Jean-Louis sent the 
patch for. I've been unable to test it as of yet.


On 11/15/2010 04:11 PM, Robert Heller wrote:

At Mon, 15 Nov 2010 15:34:31 -0500 Jean-Louis Martineaumartin...@zmanda.com  
wrote:

   

Because ravel:/mnt/RAID0 was not dumped.
Check the email report to know why or the client debug file (on ravel)
 

The E-Mail report just says:

FAILURE DUMP SUMMARY:
   ravel /mnt/RAID0 RESULTS MISSING
   ravel /mnt/RAID0 lev 1  FAILED [can't dump in degraded mode]


And there isn't a sendbackup debug file for /mnt/RAID0.  There are
sendbackup debug files for the other file systems on ravel. There is a
sendsize debug file for /mnt/RAID0 on ravel.

   

Jean-Louis

Robert Heller wrote:
 

Using /var/log/amanda/60villagedrive/amdump.1
 From Mon Nov 15 00:10:02 EST 2010

bach:/   183m finished (0:17:59)
haydn:/  147m finished (0:20:35)
ravel:/  189m finished (0:20:36)
ravel:/mnt/RAID0 1   816m failed: process terminated while waiting for 
dumping
ravel:/mnt/sdb2  1 0m finished (0:20:34)
..

This is in a CentOS 5.5 system:

-sh-3.2$ rpm -qa amanda\* perl dump
amanda-backup_server-3.2.0-1.rhel5
dump-0.4b41-4.el5
perl-5.8.8-32.el5_5.2