In other words, if the user's chosen taper algorithm can't find any dumps it likes, Amanda just goes with the first tape-ready dump on its list, regardless of whether there's enough tape to hold it.
I suppose it's trying to keep the tape drive busy, and crossing its fingers that the first dump will fit anyway (e.g. if the tapetype underestimates the tape length). But if the first dump doesn't fit, both the rest of the tape in question, and the time to write it, have been wasted. I can think of a few things it could do that might be better than picking the first dump in its list: 1. Choose the *smallest* dump that's ready for taping -- if any "too-large" dump is going to fit, that's the one. This might only make sense for FIRSTFIT and LARGESTFIT; the other algo's don't care whether the chosen dump is going to fit, so if they haven't found any they like, there's a different reason for it.
2. Just eject the tape immediately, and go on to the next one; the space will still have been wasted, but not all that writing time. Maybe this only makes sense if we know that this run has more tapes available; if we're on the run's final tape, a last-ditch effort to fill it up doesn't cost anything.
3. As Orion suggested, wait for more dumps to become available, in the hope that one will be small enough. It might make sense to keep waiting, if necessary, until all dumps have finished, but that risks filling the holding disk, so perhaps it should only wait for a limited time, or until the holding disk fills to some threshold. Better still, this decision should be user-configurable.
This was discussed in amanda-users a few months ago. The thread starts here: http://groups.yahoo.com/group/amanda-users/message/43375
I had some ideas then; at the risk of tooting my own horn a bit, here are pointers to them, for people reading amanda-hackers who don't also read amanda-user (stupid yahoogroups trashed the indentation): http://groups.yahoo.com/group/amanda-users/message/43429 http://groups.yahoo.com/group/amanda-users/message/43437
Thanks for the pointers to past discussions. My quick and dirty attempt at a solution (yet to be tested) is a "taperwait" configuration variable that will wait until all of the dumps have completed before writing to tape. This works in my case because I have a fast tape drive. I've attached a patch that contains the taperwait implementation, as well as the empty(tapeq) change mentioned in another email (it's the last entry in the patch). A better solution would to wait until the "largestfit" dump has completed. I might look into that. One way might be to create a taper queue ordered by the taper algorithm and force the taper to write in that order, waiting if necessary for dumps to complete.
However, once again it seems the best solution to most of these problems is splitting DLE's accross multiple tapes. Can someone give an update on current efforts in this direction, if any. Seriously considering devoting time to this...
-- Orion Poplawski System Administrator 303-415-9701 x222 Colorado Research Associates/NWRA FAX: 303-415-9702 3380 Mitchell Lane, Boulder CO 80301 http://www.co-ra.com
*** amanda-2.4.4p1/server-src/conffile.h.orig 2003-08-11 16:45:18.000000000 -0600
--- amanda-2.4.4p1/server-src/conffile.h 2003-08-11 16:46:16.000000000 -0600
***************
*** 80,86 ****
CNF_AMRECOVER_DO_FSF,
CNF_AMRECOVER_CHECK_LABEL,
CNF_AMRECOVER_CHANGER,
! CNF_TAPERALGO
} confparm_t;
typedef enum auth_e {
--- 80,87 ----
CNF_AMRECOVER_DO_FSF,
CNF_AMRECOVER_CHECK_LABEL,
CNF_AMRECOVER_CHANGER,
! CNF_TAPERALGO,
! CNF_TAPERWAIT
} confparm_t;
typedef enum auth_e {
*** amanda-2.4.4p1/server-src/conffile.c.orig 2003-08-11 16:45:23.000000000 -0600
--- amanda-2.4.4p1/server-src/conffile.c 2003-08-11 16:51:08.000000000 -0600
***************
*** 81,86 ****
--- 81,87 ----
AMRECOVER_DO_FSF, AMRECOVER_CHECK_LABEL, AMRECOVER_CHANGER,
TAPERALGO, FIRST, FIRSTFIT, LARGEST, LARGESTFIT, SMALLEST, LAST,
+ TAPERWAIT,
/* holding disk */
COMMENT, DIRECTORY, USE, CHUNKSIZE,
***************
*** 210,215 ****
--- 211,217 ----
static val_t conf_amrecover_do_fsf;
static val_t conf_amrecover_check_label;
static val_t conf_taperalgo;
+ static val_t conf_taperwait;
/* reals */
static val_t conf_bumpmult;
***************
*** 266,271 ****
--- 268,274 ----
static int seen_amrecover_check_label;
static int seen_amrecover_changer;
static int seen_taperalgo;
+ static int seen_taperwait;
static int allow_overwrites;
static int token_pushed;
***************
*** 412,417 ****
--- 415,421 ----
{ "AMRECOVER_CHECK_LABEL", CNF_AMRECOVER_CHECK_LABEL, BOOL },
{ "AMRECOVER_CHANGER", CNF_AMRECOVER_CHANGER, STRING },
{ "TAPERALGO", CNF_TAPERALGO, INT },
+ { "TAPERWAIT", CNF_TAPERWAIT, BOOL },
{ "AUTOFLUSH", CNF_AUTOFLUSH, BOOL },
{ "RESERVE", CNF_RESERVE, INT },
{ "MAXDUMPSIZE", CNF_MAXDUMPSIZE, INT },
***************
*** 498,503 ****
--- 502,508 ----
case CNF_AMRECOVER_CHECK_LABEL: return seen_amrecover_check_label;
case CNF_AMRECOVER_CHANGER: return seen_amrecover_changer;
case CNF_TAPERALGO: return seen_taperalgo;
+ case CNF_TAPERWAIT: return seen_taperwait;
default: return 0;
}
}
***************
*** 530,535 ****
--- 535,541 ----
case CNF_AMRECOVER_DO_FSF: r = conf_amrecover_do_fsf.i; break;
case CNF_AMRECOVER_CHECK_LABEL: r = conf_amrecover_check_label.i; break;
case CNF_TAPERALGO: r = conf_taperalgo.i; break;
+ case CNF_TAPERWAIT: r = conf_taperwait.i; break;
default:
error("error [unknown getconf_int parm: %d]", parm);
***************
*** 724,729 ****
--- 730,736 ----
conf_amrecover_do_fsf.i = 0;
conf_amrecover_check_label.i = 0;
conf_taperalgo.i = 0;
+ conf_taperwait.i = 0;
/* defaults for internal variables */
***************
*** 770,775 ****
--- 777,783 ----
seen_amrecover_check_label = 0;
seen_amrecover_changer = 0;
seen_taperalgo = 0;
+ seen_taperwait = 0;
line_num = got_parserror = 0;
allow_overwrites = 0;
token_pushed = 0;
***************
*** 947,952 ****
--- 955,961 ----
{ "AMRECOVER_CHECK_LABEL", AMRECOVER_CHECK_LABEL },
{ "AMRECOVER_CHANGER", AMRECOVER_CHANGER },
{ "TAPERALGO", TAPERALGO },
+ { "TAPERWAIT", TAPERWAIT },
{ NULL, IDENT }
};
***************
*** 1068,1073 ****
--- 1077,1083 ----
case AMRECOVER_CHANGER:
get_simple(&conf_amrecover_changer,&seen_amrecover_changer, STRING); break;
case TAPERALGO: get_taperalgo(&conf_taperalgo,&seen_taperalgo); break;
+ case TAPERWAIT: get_simple(&conf_taperwait, &seen_taperwait, BOOL); break;
case LOGFILE: /* XXX - historical */
/* truncate the filename part and pretend he said "logdir" */
***************
*** 2879,2884 ****
--- 2889,2895 ----
printf("conf_amrecover_check_label = %d\n",
getconf_int(CNF_AMRECOVER_CHECK_LABEL));
printf("conf_amrecover_changer = \"%s\"\n", getconf_str(CNF_AMRECOVER_CHANGER));
printf("conf_taperalgo = %s\n", taperalgo2str(getconf_int(CNF_TAPERALGO)));
+ printf("conf_taperwait = %d\n", getconf_int(CNF_TAPERWAIT));
/*printf("conf_diskdir = \"%s\"\n", getconf_str(CNF_DISKDIR));*/
/*printf("conf_disksize = %d\n", getconf_int(CNF_DISKSIZE));*/
*** amanda-2.4.4p1/server-src/driver.c.orig 2003-06-05 09:48:57.000000000 -0600
--- amanda-2.4.4p1/server-src/driver.c 2003-08-12 09:41:55.000000000 -0600
***************
*** 56,62 ****
int inparallel;
int nodump = 0;
long tape_length, tape_left = 0;
! int conf_taperalgo;
host_t *flushhost = NULL;
int client_constrained P((disk_t *dp));
--- 56,62 ----
int inparallel;
int nodump = 0;
long tape_length, tape_left = 0;
! int conf_taperalgo, conf_taperwait;
host_t *flushhost = NULL;
int client_constrained P((disk_t *dp));
***************
*** 209,214 ****
--- 209,215 ----
NULL);
conf_taperalgo = getconf_int(CNF_TAPERALGO);
+ conf_taperwait = getconf_int(CNF_TAPERWAIT);
conf_tapetype = getconf_str(CNF_TAPETYPE);
tape = lookup_tapetype(conf_tapetype);
tape_length = tape->length;
***************
*** 318,325 ****
printf("driver: start time %s inparallel %d bandwidth %d diskspace %lu",
walltime_str(curclock()), inparallel, free_kps((interface_t *)0),
free_space());
! printf(" dir %s datestamp %s driver: drain-ends tapeq %s big-dumpers %s\n",
! "OBSOLETE", datestamp, taperalgo2str(conf_taperalgo),
getconf_str(CNF_DUMPORDER));
fflush(stdout);
--- 319,326 ----
printf("driver: start time %s inparallel %d bandwidth %d diskspace %lu",
walltime_str(curclock()), inparallel, free_kps((interface_t *)0),
free_space());
! printf(" dir %s datestamp %s driver: drain-ends tapeq %s taperwait %d
big-dumpers %s\n",
! "OBSOLETE", datestamp, taperalgo2str(conf_taperalgo), conf_taperwait,
getconf_str(CNF_DUMPORDER));
fflush(stdout);
***************
*** 491,497 ****
disk_t *fit = NULL;
char *datestamp;
! if(!degraded_mode && !taper_busy && !empty(tapeq)) {
datestamp = sched(tapeq.head)->datestamp;
switch(conf_taperalgo) {
case ALGO_FIRST:
--- 492,498 ----
disk_t *fit = NULL;
char *datestamp;
! if(!degraded_mode && !taper_busy && !empty(tapeq) && !(conf_taperwait &&
some_dumps_in_progress())) {
datestamp = sched(tapeq.head)->datestamp;
switch(conf_taperalgo) {
case ALGO_FIRST:
***************
*** 533,538 ****
--- 534,543 ----
fit = fit->next;
}
if(dp) remove_disk(&tapeq, dp);
+ else if(some_dumps_in_progress()) {
+ fprintf(stderr, "driver: startaflush: waiting because nothing
fit\n");
+ return;
+ }
break;
case ALGO_SMALLEST:
fit = dp = tapeq.head;
***************
*** 559,566 ****
taper_busy = 1;
taper_cmd(FILE_WRITE, dp, sched(dp)->destname, sched(dp)->level,
sched(dp)->datestamp);
! fprintf(stderr,"driver: startaflush: %s %s %s %ld %ld\n",
! taperalgo2str(conf_taperalgo), dp->host->hostname,
dp->name, sched(taper_disk)->act_size, tape_left);
tape_left -= sched(dp)->act_size;
}
--- 564,571 ----
taper_busy = 1;
taper_cmd(FILE_WRITE, dp, sched(dp)->destname, sched(dp)->level,
sched(dp)->datestamp);
! fprintf(stderr,"driver: startaflush: %s %d %s %s %ld %ld\n",
! taperalgo2str(conf_taperalgo), conf_taperwait, dp->host->hostname,
dp->name, sched(taper_disk)->act_size, tape_left);
tape_left -= sched(dp)->act_size;
}
***************
*** 887,893 ****
}
}
}
! if( !active_dumpers && busy_dumpers > 0 && !taper_busy && empty(tapeq) &&
pending_aborts == 0 ) { /* not case a */
if( busy_dumpers == 1 ) { /* case c */
sched(dp)->no_space = 1;
--- 892,898 ----
}
}
}
! if( !active_dumpers && busy_dumpers > 0 && !taper_busy &&
pending_aborts == 0 ) { /* not case a */
if( busy_dumpers == 1 ) { /* case c */
sched(dp)->no_space = 1;
