Hello Jean-Louis,

Two days of backups and no more deconnection errors...  Will this patch be 
included in the next version (v3.1.4)?

Thanks again!

On 2010-11-19, at 8:22 AM, Jean-Louis Martineau wrote:

> Bonjour Luc,
> 
> It's a known bug in 3.1.3
> The attached patch fix it.
> 
> Jean-Louis
> 
> Luc Lalonde wrote:
>> Hello Folks,
>> 
>> Since I've upgraded from Amanda-2.5.2p1 to Amanda-3.1.x, I've been getting 
>> connection errors almost every day...
>> 
>> Here's a snipet of the type of errors I'm getting:
>> 
>> ############### Begin ###########################
>> These dumps were to tape vtape-14.
>> The next tape Amanda expects to use is: vtape-15.
>> FAILURE DUMP SUMMARY:
>>  chunker: FATAL startup_chunker failed: error accepting header stream: 
>> Connection timed out
>>  server1 /etc lev 1  FAILED [Can't open data output stream: Connection 
>> refused]
>>  server1 /etc lev 1  FAILED [error accepting data stream: Connection timed 
>> out]
>>  server1 /etc lev 1  was successfully retried
>>  server2 /root lev 1  FAILED [port open: Connection refused]
>>  server2 /root lev 1  was successfully retried
>> ############### End  ###########################
>> 
>> Here are my config files:
>> 
>> ############### Begin amanda.conf ##############
>> inparallel 10
>> netusage  100 mbps
>> maxdumps 2
>> 
>> dumpcycle 2 weeks
>> runspercycle 14 tapecycle 16 tapes
>> 
>> bumpsize 20 Mb
>> bumpdays 1
>> bumpmult 4
>> 
>> etimeout -21600
>> dtimeout 10800
>> 
>> autoflush yes
>> 
>> runtapes 1
>> tpchanger "chg-disk:/amanda/VTAPE/slots"
>> 
>> tapetype HARDDISK
>> define tapetype HARDDISK {
>> length 204800 mbytes
>> }
>> 
>> holdingdisk hd1 {
>>    comment "main holding disk"
>>    directory "/amanda/stage/Journalier-VTAPE"
>>    use 1000 Gb
>>    chunksize 1Gb
>>    }
>> 
>> columnspec 
>> "Hostname=0:8,Disk=1:14,OrigKB=1:10,OutKB=1:10,DumpRate=1:7,TapeRate=1:7"
>> 
>> infofile "/amanda/home/Journalier-VTAPE/curinfo"
>> logdir   "/amanda/home/Journalier-VTAPE"
>> indexdir "/amanda/home/Journalier-VTAPE/index"
>> 
>> # dumptypes
>> 
>> define dumptype global {
>>        comment "Global definitions"
>>        index yes
>>        record yes
>>        auth "bsdtcp"
>> }
>> 
>> define dumptype full-tar {
>>        global
>>        program "GNUTAR"
>>        comment "Full tar of this filesystem always"
>>        compress client fast
>>        priority high
>>        dumpcycle 0
>> }
>> 
>> define dumptype exclude-tar {
>>    global
>>    program "GNUTAR"
>>    comment "root partitions dumped with tar"
>>    compress client fast
>>    exclude list "/etc/amanda/exclude.gtar"
>>    priority low
>> }
>> 
>> define dumptype enseign-tar {
>>    global
>>    program "GNUTAR"
>>    comment "root partitions dumped with tar"
>>    compress client fast
>>    priority low
>> }
>> 
>> define dumptype server-estimate {
>>    global
>>    program "GNUTAR"
>>    comment "root partitions dumped with tar"
>>    compress client fast
>>    estimate server
>>    priority low
>> }
>> 
>> define dumptype root-tar {
>>    global
>>    program "GNUTAR"
>>    comment "root partitions dumped with tar"
>>    compress client fast
>>    priority low
>> }
>> 
>> 
>> # network interfaces
>> 
>> define interface local {
>>    comment "a local disk"
>>    use 20 mbps
>> }
>> 
>> define interface eth2 {
>>    comment "1000 Mbps ethernet"
>>    use 1000 mbps
>> }
>> ############### End amanda.conf  ##############
>> 
>> #### Begin /etc/xinetd.d/amandaserver #########
>> service amanda
>> {
>>        disable         = no
>>        socket_type     = stream
>>        protocol        = tcp
>>        wait            = no
>>        user            = amandabackup
>>        group           = disk
>>        groups          = yes
>>        server          = /usr/libexec/amanda/amandad
>>        server_args     = -auth=bsdtcp amdump amindexd amidxtaped
>> }
>> #### End /etc/xinetd.d/amandaserver   #########
>> 
>> #### Begin /etc/xinetd.d/amandaclient #########
>> service amanda
>> {
>>        disable         = no
>>        socket_type     = stream
>>        protocol        = tcp
>>        wait            = no
>>        user            = amandabackup
>>        group           = disk
>>        groups          = yes
>>        server          = /usr/libexec/amanda/amandad
>>        server_args     = -auth=bsdtcp amdump
>> }
>> #### End /etc/xinetd.d/amandaclient  #########
>> 
>> And finally, here's a snippet of a pertinent dumper debug:
>> 
>> ############## begin dump debug #####################
>> Wed Nov 17 23:34:44 2010: dumper: security_streaminit(stream=0x1b64d110, 
>> driver=0x3414c66520 (BSDTCP))
>> Wed Nov 17 23:34:44 2010: dumper: security_streaminit(stream=0x1b655170, 
>> driver=0x3414c66520 (BSDTCP))
>> Wed Nov 17 23:34:44 2010: dumper: security_streaminit(stream=0x1b65d1d0, 
>> driver=0x3414c66520 (BSDTCP))
>> Wed Nov 17 23:34:44 2010: dumper: security_close(handle=0x1b643760, 
>> driver=0x3414c66520 (BSDTCP))
>> Wed Nov 17 23:34:44 2010: dumper: security_stream_close(0x1b644760)
>> Wed Nov 17 23:34:44 2010: dumper: Building type FILE header of 32768-32768 
>> bytes with name='server1' disk='/etc' dumplevel=1 and blocksize=32768
>> Wed Nov 17 23:34:44 2010: dumper: Sending data to 127.0.0.1:11039
>> Wed Nov 17 23:34:44 2010: dumper: make_socket opening socket with family 2
>> Wed Nov 17 23:34:44 2010: dumper: connect_port: Try  port 11004: available - 
>> Address already in use
>> Wed Nov 17 23:34:44 2010: dumper: connect_port: Skip port 11000: owned by 
>> irisa.
>> Wed Nov 17 23:34:44 2010: dumper: connect_port: Skip port 11001: owned by 
>> metasys.
>> Wed Nov 17 23:34:44 2010: dumper: make_socket opening socket with family 2
>> Wed Nov 17 23:34:44 2010: dumper: connect_port: Try  port 11002: available - 
>> Success
>> Wed Nov 17 23:34:44 2010: dumper: connect_portrange: Connect from 
>> 0.0.0.0.11002 failed: Connection refused
>> Wed Nov 17 23:34:44 2010: dumper: connect_portrange: connect to 
>> 127.0.0.1.11039 failed: Connection refused
>> Wed Nov 17 23:34:44 2010: dumper: stream_client: Could not bind to port in 
>> range 11000-11040.
>> Wed Nov 17 23:34:44 2010: dumper: security_stream_close(0x1b64d110)
>> Wed Nov 17 23:34:44 2010: dumper: security_stream_close(0x1b655170)
>> Wed Nov 17 23:34:44 2010: dumper: security_stream_close(0x1b65d1d0)
>> Wed Nov 17 23:34:44 2010: dumper: putresult: 10 FAILED
>> Wed Nov 17 23:39:45 2010: dumper: getcmd: PORT-DUMP 02-00036 11023 server1 
>> ffffffff9efefbffffffffff01 /etc NODEVICE 1 2010:11:17:4:0:3 GNUTAR "" "" "" 
>> "" bsdtcp AMANDA 12
>> 7.0.0.1:11024 |"  <auth>bsdtcp</auth>\n  <compress>FAST</compress>\n  
>> <record>YES</record>\n  <index>YES</index>\n  <datapath>AMANDA</datapath>\n  
>> <exclude>\n    <lis
>> t>/etc/amanda/exclude.gtar</list>\n  </exclude>\n"
>> Wed Nov 17 23:39:45 2010: dumper: Sending header to localhost:11023
>> 
>> Wed Nov 17 23:39:45 2010: dumper: make_socket opening socket with family 2
>> Wed Nov 17 23:39:45 2010: dumper: connect_port: Try  port 11004: available - 
>> Success
>> Wed Nov 17 23:39:45 2010: dumper: connected to 127.0.0.1.11023
>> Wed Nov 17 23:39:45 2010: dumper: our side is 0.0.0.0.11004
>> Wed Nov 17 23:39:45 2010: dumper: try_socksize: send buffer size is 65536
>> Wed Nov 17 23:39:45 2010: dumper: send request:
>> ############## end dump debug   #####################
>> 
>> Anyone have ideas on how to resolve this problem?  
>> Thanks!
>>  
> 
> Index: xfer-src/element-glue.c
> ===================================================================
> --- xfer-src/element-glue.c   (revision 3580)
> +++ xfer-src/element-glue.c   (working copy)
> @@ -153,10 +153,6 @@
>       return -1;
>     }
> 
> -    /* close the listening socket now, for good measure */
> -    close(*socketp);
> -    *socketp = -1;
> -
>     return sock;
> }
> 
> @@ -1071,10 +1067,10 @@
>     /* close our pipes if they're still open (they shouldn't be!) */
>     if (self->pipe[0] != -1) close(self->pipe[0]);
>     if (self->pipe[1] != -1) close(self->pipe[1]);
> -    if (self->input_listen_socket != -1) close(self->input_listen_socket);
> -    if (self->output_listen_socket != -1) close(self->output_listen_socket);
>     if (self->input_data_socket != -1) close(self->input_data_socket);
>     if (self->output_data_socket != -1) close(self->output_data_socket);
> +    if (self->input_listen_socket != -1) close(self->input_listen_socket);
> +    if (self->output_listen_socket != -1) close(self->output_listen_socket);
> 
>     if (self->ring) {
>       /* empty the ring buffer, ignoring syncronization issues */
> Index: server-src/chunker.c
> ===================================================================
> --- server-src/chunker.c      (revision 3580)
> +++ server-src/chunker.c      (working copy)
> @@ -102,8 +102,8 @@
> static void databuf_init(struct databuf *, int, char *, off_t, off_t);
> static int databuf_flush(struct databuf *);
> 
> -static int startup_chunker(char *, off_t, off_t, struct databuf *, int *);
> -static int do_chunk(int, struct databuf *, int);
> +static int startup_chunker(char *, off_t, off_t, struct databuf *, int *, 
> int *);
> +static int do_chunk(int, struct databuf *, int, int);
> 
> 
> int
> @@ -123,6 +123,7 @@
>     config_overrides_t *cfg_ovr = NULL;
>     char *cfg_opt = NULL;
>     char *m;
> +    int header_socket;
>     int data_socket;
> 
>     /*
> @@ -296,13 +297,13 @@
>           }
> 
>           if ((header_fd = startup_chunker(filename, use, chunksize, &db,
> -                                          &data_socket)) < 0) {
> +                                          &header_socket, &data_socket)) < 
> 0) {
>               q = quote_string(vstrallocf(_("[chunker startup failed: %s]"), 
> errstr));
>               putresult(TRYAGAIN, "%s %s\n", handle, q);
>               error("startup_chunker failed: %s", errstr);
>           }
>           command_in_transit = NULL;
> -         if (header_fd >= 0 && do_chunk(header_fd, &db, data_socket)) {
> +         if (header_fd >= 0 && do_chunk(header_fd, &db, header_socket, 
> data_socket)) {
>               char kb_str[NUM_STR_SIZE];
>               char kps_str[NUM_STR_SIZE];
>               double rt;
> @@ -418,6 +419,7 @@
>     off_t             use,
>     off_t             chunksize,
>     struct databuf *  db,
> +    int                *headersocket,
>     int                *datasocket)
> {
>     int header_fd, outfd;
> @@ -463,7 +465,6 @@
>       aclose(data_socket);
>       return -1;
>     }
> -    aclose(header_socket);
> 
>     tmp_filename = vstralloc(filename, ".tmp", NULL);
>     pc = strrchr(tmp_filename, '/');
> @@ -493,6 +494,7 @@
>     amfree(tmp_filename);
>     databuf_init(db, outfd, filename, use, chunksize);
>     db->filename_seq++;
> +    *headersocket = header_socket;
>     *datasocket = data_socket;
>     return header_fd;
> }
> @@ -501,6 +503,7 @@
> do_chunk(
>     int                       header_fd,
>     struct databuf *  db,
> +    int                 header_socket,
>     int                 data_socket)
> {
>     size_t nread;
> @@ -519,6 +522,8 @@
>      * chunk code will rewrite it.
>      */
>     nread = full_read(header_fd, header_buf, SIZEOF(header_buf));
> +    aclose(header_fd);
> +    aclose(header_socket);
>     if (nread != sizeof(header_buf)) {
>       if(errno != 0) {
>           errstr = vstrallocf(_("cannot read header: %s"), strerror(errno));
> @@ -556,7 +561,6 @@
>       aclose(data_socket);
>       return 0;
>     }
> -    aclose(data_socket);
> 
>     /*
>      * We've written the file header.  Now, just write data until the
> @@ -567,12 +571,16 @@
>       db->datain += nread;
>       while(db->dataout < db->datain) {
>           if(!databuf_flush(db)) {
> +             aclose(data_fd);
> +             aclose(data_socket);
>               return 0;
>           }
>       }
>     }
>     while(db->dataout < db->datain) {
>       if(!databuf_flush(db)) {
> +         aclose(data_fd);
> +         aclose(data_socket);
>           return 0;
>       }
>     }
> @@ -580,6 +588,8 @@
>       dumpsize += (off_t)1;                   /* count partial final KByte */
>       filesize += (off_t)1;
>     }
> +    aclose(data_fd);
> +    aclose(data_socket);
>     return 1;
> }
> 


Reply via email to