Where I work, we recently switched from manually triggered vaulting to
automatic vaulting using the vault-storage, vault, and dump-selection
options. Things appear to be working correctly, but we keep getting
some odd non-fatal error messages (that might be bogus as well, since
I've verified the dumps mentioned restore correctly) in the amdump
e-mails. I've been trying to figure out these 'errors' for the past
few weeks now, and I'm hoping someone on the list might have some advice
(or better yet, might recognize the symptoms and know how to fix them).
In our configuration, we have three different backup sets (each is on
it's own schedule). Of these, two are consistently showing the following
error in the amdump e-mail report (I've redacted hostnames and exact paths,
the second path listed though is a parent directory of the first):
taper: FATAL Header of dumpfile does not match command from driver 0 XXXXXXX
/home/XXXXXXXXXXXXXXXXX 20171031074642 ------ 0 XXXXXXX /home/XXXXXX
20171031074642 at /usr/lib64/perl5/vendor_perl/5.24.1/Amanda/Taper/Worker.pm
line 1168
For a given backup set, the particular hostname and paths are always the
same, but the backup appears to get taped correctly, and restores
correctly as well.
With the third backup set, we're regularly seeing things like the
following in the dump summary section, but no other visible error
messages:
DUMPER
STATS TAPER STATS
HOSTNAME DISK L ORIG-KB OUT-KB COMP% MMM:SS
KB/s MMM:SS KB/s
--------------------------------------------- ----------------------
---------------- ---------------
XXXXXXXXXX /boot 0 -- FAILED
XXXXXXXXXX /boot 1 10 10 -- 0:00
168.8 0:00 0.0
In this case, the particular DLE's affected are always the same,
and the first line that claims a failure always shows dump level
zero, even when the backup is supposed to be at another level.
Just like the other error, the affected dumps always restore
correctly when tested, and get correctly vaulted as well. The
affected DLE's are only on Linux systems, but it seems to not
care what distro or amanda version is being used (it's affected,
Debian, Gentoo, and Fedora systems, and covers 5 different
Amanda client versions), and are invariably small (sub-gigabyte)
filesystems, but I've not found any other commonality among them.
All three sets use essentially the same amanda.conf file (the
differences are literally just in when they get run), which
I've attached in-line at the end of this e-mail with
sensitive data redacted. The thing I find particularly odd is
that this config is essentially identical to what I use on my
personal systems, which are not exhibiting either problem.
8<------------------------------------------------------------
org "XXXXX"
mailto "admin"
dumpuser "amanda"
inparallel 2
dumporder "Ss"
taperalgo largestfit
displayunit "k"
netusage 8000000 Kbps
dumpcycle 4 weeks
runspercycle 28
tapecycle 128 tapes
bumppercent 20
bumpdays 2
etimeout 900
dtimeout 1800
ctimeout 30
device_output_buffer_size 256M
compress-index no
flush-threshold-dumped 0
flush-threshold-scheduled 0
taperflush 0
autoflush yes
runtapes 16
define changer vtl {
tapedev "chg-disk:/net/XXXXXXXXXXXXXXXXXX/amanda/XXXXX"
changerfile "/etc/amanda/XXXXX/changer"
property "num-slot" "128"
property "auto-create-slot" "yes"
}
define changer aws {
tapedev
"chg-multi:s3:XXXXXXXXXXXXXXXX/slot-{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127}"
changerfile "/etc/amanda/XXXXX/s3-changer"
device-property "S3_SSL" "YES"
device-property "S3_ACCESS_KEY" "XXXXXXXXXXXXXXXXXXXX"
device-property "S3_SECRET_KEY"
"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
device-property "S3_MULTI_PART_UPLOAD" "YES"
device-property "CREATE_BUCKET" "NO"
device-property "S3_BUCKET_LOCATION" "XXXXXXXXX"
device-property "STORAGE_API" "AWS4"
}
define storage local-vtl {
tpchanger "vtl"
tapepool "$r"
tapetype "V64G"
labelstr "^XXXX-[0-9][0-9]*$"
autolabel "XXXX-%%%" any
erase-on-full YES
erase-on-failure YES
vault cloud 0
}
define storage cloud {
tpchanger "aws"
tapepool "$r"
tapetype "S3TAPE"
labelstr "^Vault-XXXX-[0-9][0-9]*$"
autolabel "Vault-XXXX-%%%" any
erase-on-full YES
erase-on-failure YES
dump-selection ALL FULL
}
storage "local-vtl"
vault-storage "cloud"
maxdumps 4
maxdumpsize -1
amrecover_changer "vtl"
holdingdisk hd1 {
comment "main holding disk"
directory "/var/lib/amanda/XXXXX"
use 128 Gb
chunksize 1Gb
}
infofile "/etc/amanda/XXXXX/curinfo"
logdir "/etc/amanda/XXXXX"
indexdir "/var/lib/amanda/XXXXX/index"
tapelist "/etc/amanda/XXXXX/tapelist"
define tapetype V64G {
length 65536 MB
part-size 1G
part-cache-type memory
}
define tapetype S3TAPE {
length 2048 GB
part-size 1G
part-cache-type memory
}
define application amgtar {
plugin "amgtar"
comment "amgtar"
property append "ignore" "file changed as we read it$"
property append "ignore" "File removed before we read it$"
property "CHECK-DEVICE" "NO"
}
define dumptype global {
comment "Global definitions"
index yes
exclude list ".amanda.excludes"
compress client fast
}
define dumptype root-tar {
global
program "APPLICATION"
application "amgtar"
comment "root partitions dumped with tar"
compress none
index
priority low
}
define dumptype high-tar {
root-tar
comment "partitions dumped with tar"
priority high
}
define dumptype remote-high {
high-tar
auth "ssh"
ssh_keys "/XXXXXXXXXXXXXXXXXXX"
estimate calcsize
maxdumps 4
compress server custom
server-custom-compress "/usr/bin/zstd"
}
define dumptype remote-low {
remote-high
priority low
}
define interactivity inter_tty {
plugin "tty"
}
define interactivity inter_email {
plugin "email"
property "mailto" "admin1"
property "resend-delay" "10"
property "check-file" "/tmp/email_input"
property "check-file-delay" "10"
}
define interactivity inter_tty_email {
plugin "tty_email"
property "mailto" "admin1"
property "resend-delay" "10"
property "check-file" "/tmp/email_input"
property "check-file-delay" "10"
}
interactivity "inter_tty_email"
define taperscan taper_traditional {
comment "traditional"
plugin "traditional"
}
define taperscan taper_oldest {
comment "oldest"
plugin "oldest"
}
define taperscan taper_lexical {
comment "lexical"
plugin "lexical"
}
taperscan "taper_lexical"