Hello Jean-Louis,
Two days of backups and no more deconnection errors... Will this patch be
included in the next version (v3.1.4)?
Thanks again!
On 2010-11-19, at 8:22 AM, Jean-Louis Martineau wrote:
> Bonjour Luc,
>
> It's a known bug in 3.1.3
> The attached patch fix it.
>
> Jean-Louis
>
> Luc Lalonde wrote:
>> Hello Folks,
>>
>> Since I've upgraded from Amanda-2.5.2p1 to Amanda-3.1.x, I've been getting
>> connection errors almost every day...
>>
>> Here's a snipet of the type of errors I'm getting:
>>
>> ############### Begin ###########################
>> These dumps were to tape vtape-14.
>> The next tape Amanda expects to use is: vtape-15.
>> FAILURE DUMP SUMMARY:
>> chunker: FATAL startup_chunker failed: error accepting header stream:
>> Connection timed out
>> server1 /etc lev 1 FAILED [Can't open data output stream: Connection
>> refused]
>> server1 /etc lev 1 FAILED [error accepting data stream: Connection timed
>> out]
>> server1 /etc lev 1 was successfully retried
>> server2 /root lev 1 FAILED [port open: Connection refused]
>> server2 /root lev 1 was successfully retried
>> ############### End ###########################
>>
>> Here are my config files:
>>
>> ############### Begin amanda.conf ##############
>> inparallel 10
>> netusage 100 mbps
>> maxdumps 2
>>
>> dumpcycle 2 weeks
>> runspercycle 14 tapecycle 16 tapes
>>
>> bumpsize 20 Mb
>> bumpdays 1
>> bumpmult 4
>>
>> etimeout -21600
>> dtimeout 10800
>>
>> autoflush yes
>>
>> runtapes 1
>> tpchanger "chg-disk:/amanda/VTAPE/slots"
>>
>> tapetype HARDDISK
>> define tapetype HARDDISK {
>> length 204800 mbytes
>> }
>>
>> holdingdisk hd1 {
>> comment "main holding disk"
>> directory "/amanda/stage/Journalier-VTAPE"
>> use 1000 Gb
>> chunksize 1Gb
>> }
>>
>> columnspec
>> "Hostname=0:8,Disk=1:14,OrigKB=1:10,OutKB=1:10,DumpRate=1:7,TapeRate=1:7"
>>
>> infofile "/amanda/home/Journalier-VTAPE/curinfo"
>> logdir "/amanda/home/Journalier-VTAPE"
>> indexdir "/amanda/home/Journalier-VTAPE/index"
>>
>> # dumptypes
>>
>> define dumptype global {
>> comment "Global definitions"
>> index yes
>> record yes
>> auth "bsdtcp"
>> }
>>
>> define dumptype full-tar {
>> global
>> program "GNUTAR"
>> comment "Full tar of this filesystem always"
>> compress client fast
>> priority high
>> dumpcycle 0
>> }
>>
>> define dumptype exclude-tar {
>> global
>> program "GNUTAR"
>> comment "root partitions dumped with tar"
>> compress client fast
>> exclude list "/etc/amanda/exclude.gtar"
>> priority low
>> }
>>
>> define dumptype enseign-tar {
>> global
>> program "GNUTAR"
>> comment "root partitions dumped with tar"
>> compress client fast
>> priority low
>> }
>>
>> define dumptype server-estimate {
>> global
>> program "GNUTAR"
>> comment "root partitions dumped with tar"
>> compress client fast
>> estimate server
>> priority low
>> }
>>
>> define dumptype root-tar {
>> global
>> program "GNUTAR"
>> comment "root partitions dumped with tar"
>> compress client fast
>> priority low
>> }
>>
>>
>> # network interfaces
>>
>> define interface local {
>> comment "a local disk"
>> use 20 mbps
>> }
>>
>> define interface eth2 {
>> comment "1000 Mbps ethernet"
>> use 1000 mbps
>> }
>> ############### End amanda.conf ##############
>>
>> #### Begin /etc/xinetd.d/amandaserver #########
>> service amanda
>> {
>> disable = no
>> socket_type = stream
>> protocol = tcp
>> wait = no
>> user = amandabackup
>> group = disk
>> groups = yes
>> server = /usr/libexec/amanda/amandad
>> server_args = -auth=bsdtcp amdump amindexd amidxtaped
>> }
>> #### End /etc/xinetd.d/amandaserver #########
>>
>> #### Begin /etc/xinetd.d/amandaclient #########
>> service amanda
>> {
>> disable = no
>> socket_type = stream
>> protocol = tcp
>> wait = no
>> user = amandabackup
>> group = disk
>> groups = yes
>> server = /usr/libexec/amanda/amandad
>> server_args = -auth=bsdtcp amdump
>> }
>> #### End /etc/xinetd.d/amandaclient #########
>>
>> And finally, here's a snippet of a pertinent dumper debug:
>>
>> ############## begin dump debug #####################
>> Wed Nov 17 23:34:44 2010: dumper: security_streaminit(stream=0x1b64d110,
>> driver=0x3414c66520 (BSDTCP))
>> Wed Nov 17 23:34:44 2010: dumper: security_streaminit(stream=0x1b655170,
>> driver=0x3414c66520 (BSDTCP))
>> Wed Nov 17 23:34:44 2010: dumper: security_streaminit(stream=0x1b65d1d0,
>> driver=0x3414c66520 (BSDTCP))
>> Wed Nov 17 23:34:44 2010: dumper: security_close(handle=0x1b643760,
>> driver=0x3414c66520 (BSDTCP))
>> Wed Nov 17 23:34:44 2010: dumper: security_stream_close(0x1b644760)
>> Wed Nov 17 23:34:44 2010: dumper: Building type FILE header of 32768-32768
>> bytes with name='server1' disk='/etc' dumplevel=1 and blocksize=32768
>> Wed Nov 17 23:34:44 2010: dumper: Sending data to 127.0.0.1:11039
>> Wed Nov 17 23:34:44 2010: dumper: make_socket opening socket with family 2
>> Wed Nov 17 23:34:44 2010: dumper: connect_port: Try port 11004: available -
>> Address already in use
>> Wed Nov 17 23:34:44 2010: dumper: connect_port: Skip port 11000: owned by
>> irisa.
>> Wed Nov 17 23:34:44 2010: dumper: connect_port: Skip port 11001: owned by
>> metasys.
>> Wed Nov 17 23:34:44 2010: dumper: make_socket opening socket with family 2
>> Wed Nov 17 23:34:44 2010: dumper: connect_port: Try port 11002: available -
>> Success
>> Wed Nov 17 23:34:44 2010: dumper: connect_portrange: Connect from
>> 0.0.0.0.11002 failed: Connection refused
>> Wed Nov 17 23:34:44 2010: dumper: connect_portrange: connect to
>> 127.0.0.1.11039 failed: Connection refused
>> Wed Nov 17 23:34:44 2010: dumper: stream_client: Could not bind to port in
>> range 11000-11040.
>> Wed Nov 17 23:34:44 2010: dumper: security_stream_close(0x1b64d110)
>> Wed Nov 17 23:34:44 2010: dumper: security_stream_close(0x1b655170)
>> Wed Nov 17 23:34:44 2010: dumper: security_stream_close(0x1b65d1d0)
>> Wed Nov 17 23:34:44 2010: dumper: putresult: 10 FAILED
>> Wed Nov 17 23:39:45 2010: dumper: getcmd: PORT-DUMP 02-00036 11023 server1
>> ffffffff9efefbffffffffff01 /etc NODEVICE 1 2010:11:17:4:0:3 GNUTAR "" "" ""
>> "" bsdtcp AMANDA 12
>> 7.0.0.1:11024 |" <auth>bsdtcp</auth>\n <compress>FAST</compress>\n
>> <record>YES</record>\n <index>YES</index>\n <datapath>AMANDA</datapath>\n
>> <exclude>\n <lis
>> t>/etc/amanda/exclude.gtar</list>\n </exclude>\n"
>> Wed Nov 17 23:39:45 2010: dumper: Sending header to localhost:11023
>>
>> Wed Nov 17 23:39:45 2010: dumper: make_socket opening socket with family 2
>> Wed Nov 17 23:39:45 2010: dumper: connect_port: Try port 11004: available -
>> Success
>> Wed Nov 17 23:39:45 2010: dumper: connected to 127.0.0.1.11023
>> Wed Nov 17 23:39:45 2010: dumper: our side is 0.0.0.0.11004
>> Wed Nov 17 23:39:45 2010: dumper: try_socksize: send buffer size is 65536
>> Wed Nov 17 23:39:45 2010: dumper: send request:
>> ############## end dump debug #####################
>>
>> Anyone have ideas on how to resolve this problem?
>> Thanks!
>>
>
> Index: xfer-src/element-glue.c
> ===================================================================
> --- xfer-src/element-glue.c (revision 3580)
> +++ xfer-src/element-glue.c (working copy)
> @@ -153,10 +153,6 @@
> return -1;
> }
>
> - /* close the listening socket now, for good measure */
> - close(*socketp);
> - *socketp = -1;
> -
> return sock;
> }
>
> @@ -1071,10 +1067,10 @@
> /* close our pipes if they're still open (they shouldn't be!) */
> if (self->pipe[0] != -1) close(self->pipe[0]);
> if (self->pipe[1] != -1) close(self->pipe[1]);
> - if (self->input_listen_socket != -1) close(self->input_listen_socket);
> - if (self->output_listen_socket != -1) close(self->output_listen_socket);
> if (self->input_data_socket != -1) close(self->input_data_socket);
> if (self->output_data_socket != -1) close(self->output_data_socket);
> + if (self->input_listen_socket != -1) close(self->input_listen_socket);
> + if (self->output_listen_socket != -1) close(self->output_listen_socket);
>
> if (self->ring) {
> /* empty the ring buffer, ignoring syncronization issues */
> Index: server-src/chunker.c
> ===================================================================
> --- server-src/chunker.c (revision 3580)
> +++ server-src/chunker.c (working copy)
> @@ -102,8 +102,8 @@
> static void databuf_init(struct databuf *, int, char *, off_t, off_t);
> static int databuf_flush(struct databuf *);
>
> -static int startup_chunker(char *, off_t, off_t, struct databuf *, int *);
> -static int do_chunk(int, struct databuf *, int);
> +static int startup_chunker(char *, off_t, off_t, struct databuf *, int *,
> int *);
> +static int do_chunk(int, struct databuf *, int, int);
>
>
> int
> @@ -123,6 +123,7 @@
> config_overrides_t *cfg_ovr = NULL;
> char *cfg_opt = NULL;
> char *m;
> + int header_socket;
> int data_socket;
>
> /*
> @@ -296,13 +297,13 @@
> }
>
> if ((header_fd = startup_chunker(filename, use, chunksize, &db,
> - &data_socket)) < 0) {
> + &header_socket, &data_socket)) <
> 0) {
> q = quote_string(vstrallocf(_("[chunker startup failed: %s]"),
> errstr));
> putresult(TRYAGAIN, "%s %s\n", handle, q);
> error("startup_chunker failed: %s", errstr);
> }
> command_in_transit = NULL;
> - if (header_fd >= 0 && do_chunk(header_fd, &db, data_socket)) {
> + if (header_fd >= 0 && do_chunk(header_fd, &db, header_socket,
> data_socket)) {
> char kb_str[NUM_STR_SIZE];
> char kps_str[NUM_STR_SIZE];
> double rt;
> @@ -418,6 +419,7 @@
> off_t use,
> off_t chunksize,
> struct databuf * db,
> + int *headersocket,
> int *datasocket)
> {
> int header_fd, outfd;
> @@ -463,7 +465,6 @@
> aclose(data_socket);
> return -1;
> }
> - aclose(header_socket);
>
> tmp_filename = vstralloc(filename, ".tmp", NULL);
> pc = strrchr(tmp_filename, '/');
> @@ -493,6 +494,7 @@
> amfree(tmp_filename);
> databuf_init(db, outfd, filename, use, chunksize);
> db->filename_seq++;
> + *headersocket = header_socket;
> *datasocket = data_socket;
> return header_fd;
> }
> @@ -501,6 +503,7 @@
> do_chunk(
> int header_fd,
> struct databuf * db,
> + int header_socket,
> int data_socket)
> {
> size_t nread;
> @@ -519,6 +522,8 @@
> * chunk code will rewrite it.
> */
> nread = full_read(header_fd, header_buf, SIZEOF(header_buf));
> + aclose(header_fd);
> + aclose(header_socket);
> if (nread != sizeof(header_buf)) {
> if(errno != 0) {
> errstr = vstrallocf(_("cannot read header: %s"), strerror(errno));
> @@ -556,7 +561,6 @@
> aclose(data_socket);
> return 0;
> }
> - aclose(data_socket);
>
> /*
> * We've written the file header. Now, just write data until the
> @@ -567,12 +571,16 @@
> db->datain += nread;
> while(db->dataout < db->datain) {
> if(!databuf_flush(db)) {
> + aclose(data_fd);
> + aclose(data_socket);
> return 0;
> }
> }
> }
> while(db->dataout < db->datain) {
> if(!databuf_flush(db)) {
> + aclose(data_fd);
> + aclose(data_socket);
> return 0;
> }
> }
> @@ -580,6 +588,8 @@
> dumpsize += (off_t)1; /* count partial final KByte */
> filesize += (off_t)1;
> }
> + aclose(data_fd);
> + aclose(data_socket);
> return 1;
> }
>