On  5 Jul, Georg-W. Koltermann wrote:
> Am Mi, 2002-07-03 um 17.31 schrieb David O'Brien:
>> On a 27-June-2002 23:02:00 UTC system (just before ipfw2 went in,
>> pre-KSE3), dump will not complete dumping more than 5GB.  At that point
>> it stops responding properly to ^T, which should give "DUMP: 47.52% done,
>> finished in 1:19".  At the 5GB mark, ^T gives:
>> 
>>     load: 0.00  cmd: dump 3981 [physstr] 2.11u 43.06s 0% 1536k
>> 
>> and never changes.  The user and system times never advance.  Anybody
>> have any ideas?
> 
> For me it is broken in a different way. For a small FS like / it works,
> but dumping my /home, which is 4G, I get 
> 
>      DUMP: read error from /dev/ad0s5e: Invalid argument: [sector -1054739789]: 
>count=-1
>      DUMP: read error from /dev/ad0s5e: Invalid argument: [sector -1054739788]: 
>count=-1
>      DUMP: read error from /dev/ad0s5e: Invalid argument: [sector -1054739787]: 
>count=-1
>      DUMP: read error from /dev/ad0s5e: Invalid argument: [sector -1054739786]: 
>count=-1
>     
> and on and on. 
> 
> Maybe a 32 bit <--> 64 bit mismatch caused by UFS2?  My -current is of
> date=2002.06.27.22.00.00.

I was finally finally able to reproduce this by creating a large file
before doing the dump.  Dump(8) is *very* hosed.  The UFS2 import broke
it's ability to follow multiple levels of indirect blocks.

Here's a patch that fixed the problem along with a bunch of print format
mismatches:

Index: tape.c
===================================================================
RCS file: /home/ncvs/src/sbin/dump/tape.c,v
retrieving revision 1.20
diff -u -r1.20 tape.c
--- tape.c      21 Jun 2002 06:17:57 -0000      1.20
+++ tape.c      7 Jul 2002 03:56:31 -0000
@@ -204,7 +204,7 @@
                quit("Cannot recover\n");
                /* NOTREACHED */
        }
-       msg("write error %d blocks into volume %d\n", blocksthisvol, tapeno);
+       msg("write error %ld blocks into volume %d\n", blocksthisvol, tapeno);
        broadcast("DUMP WRITE ERROR!\n");
        if (!query("Do you want to restart?"))
                dumpabort(0);
Index: traverse.c
===================================================================
RCS file: /home/ncvs/src/sbin/dump/traverse.c,v
retrieving revision 1.19
diff -u -r1.19 traverse.c
--- traverse.c  21 Jun 2002 06:17:57 -0000      1.19
+++ traverse.c  7 Jul 2002 04:24:14 -0000
@@ -275,9 +275,9 @@
 {
        int ret = 0;
        int i;
-       static caddr_t idblk;
+       caddr_t idblk;
 
-       if (idblk == NULL && (idblk = malloc(sblock->fs_bsize)) == NULL)
+       if ((idblk = malloc(sblock->fs_bsize)) == NULL)
                quit("dirindir: cannot allocate indirect memory.\n");
        bread(fsbtodb(sblock, blkno), idblk, (int)sblock->fs_bsize);
        if (ind_level <= 0) {
@@ -294,6 +294,7 @@
                        else
                                *filesize -= sblock->fs_bsize;
                }
+               free(idblk);
                return (ret);
        }
        ind_level--;
@@ -306,6 +307,7 @@
                        ret |= dirindir(ino, blkno, ind_level, filesize,
                            tapesize, nodump);
        }
+       free(idblk);
        return (ret);
 }
 
@@ -501,9 +503,9 @@
 dmpindir(ino_t ino, ufs2_daddr_t blk, int ind_level, off_t *size)
 {
        int i, cnt;
-       static caddr_t idblk;
+       caddr_t idblk;
 
-       if (idblk == NULL && (idblk = malloc(sblock->fs_bsize)) == NULL)
+       if ((idblk = malloc(sblock->fs_bsize)) == NULL)
                quit("dmpindir: cannot allocate indirect memory.\n");
        if (blk != 0)
                bread(fsbtodb(sblock, blk), idblk, (int) sblock->fs_bsize);
@@ -519,6 +521,7 @@
                        ufs1_blksout((ufs1_daddr_t *)idblk, cnt, ino);
                else
                        ufs2_blksout((ufs2_daddr_t *)idblk, cnt, ino);
+               free(idblk);
                return;
        }
        ind_level--;
@@ -529,9 +532,12 @@
                else
                        dmpindir(ino, ((ufs2_daddr_t *)idblk)[i], ind_level,
                            size);
-               if (*size <= 0)
+               if (*size <= 0) {
+                       free(idblk);
                        return;
+               }
        }
+       free(idblk);
 }
 
 /*
@@ -705,13 +711,13 @@
                goto loop;
        }
        if (cnt == -1)
-               msg("read error from %s: %s: [block %d]: count=%d\n",
+               msg("read error from %s: %s: [block %qd]: count=%d\n",
                        disk, strerror(errno), blkno, size);
        else
-               msg("short read error from %s: [block %d]: count=%d, got=%d\n",
+               msg("short read error from %s: [block %qd]: count=%d, got=%d\n",
                        disk, blkno, size, cnt);
        if (++breaderrors > BREADEMAX) {
-               msg("More than %d block read errors from %d\n",
+               msg("More than %d block read errors from %s\n",
                        BREADEMAX, disk);
                broadcast("DUMP IS AILING!\n");
                msg("This is an unrecoverable error.\n");
@@ -730,11 +736,11 @@
                    ((off_t)blkno << dev_bshift))) == dev_bsize)
                        continue;
                if (cnt == -1) {
-                       msg("read error from %s: %s: [sector %d]: count=%d\n",
+                       msg("read error from %s: %s: [sector %qd]: count=%d\n",
                                disk, strerror(errno), blkno, dev_bsize);
                        continue;
                }
-               msg("short read error from %s: [sector %d]: count=%d, got=%d\n",
+               msg("short read error from %s: [sector %qd]: count=%d, got=%d\n",
                        disk, blkno, dev_bsize, cnt);
        }
 }



To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-current" in the body of the message

Reply via email to