The attached patch does not break with the namei tradition to fsync the link count file at every change, although the need for this is still debatable.

Rather, it groups fsyncs to the link count file in batches, with a final fsync prior to releasing the volume back to the file server. It hence speeds up clone and related operations (vos backup, vos move, salvage, etc) tremendously, most noticeable for volumes with a large number of files (hundreds of thousands), where we observed speedups in excess of 200...


(Bcc-ed to openafs-bugs)



--
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
Rainer Toebbicke
European Laboratory for Particle Physics(CERN) - Geneva, Switzerland
Phone: +41 22 767 8985       Fax: +41 22 767 7155
unchanged:
--- openafs/src/vol/ihandle.h.o1381     2004-08-25 09:14:19.000000000 +0200
+++ openafs/src/vol/ihandle.h   2005-05-24 15:22:17.000000000 +0200
@@ -221,6 +221,7 @@
 
 /* Flags for the Inode handle */
 #define IH_REALLY_CLOSED               1
+#define IH_DELAY_SYNC                  16
 
 /* Hash function for inode handles */
 #define I_HANDLE_HASH_SIZE     1024    /* power of 2 */
@@ -466,7 +467,7 @@
 #define FDH_WRITE(H, B, S) OS_WRITE((H)->fd_fd, B, S)
 #define FDH_SEEK(H, O, F) OS_SEEK((H)->fd_fd, O, F)
 
-#define FDH_SYNC(H) OS_SYNC((H)->fd_fd)
+#define FDH_SYNC(H) ((H->fd_ih->ih_flags&IH_DELAY_SYNC) ? 0 : 
OS_SYNC((H)->fd_fd))
 #define FDH_TRUNC(H, L) OS_TRUNC((H)->fd_fd, L)
 #define FDH_SIZE(H) OS_SIZE((H)->fd_fd)
 
unchanged:
--- openafs/src/vol/purge.c.o1381       2004-08-25 09:14:19.000000000 +0200
+++ openafs/src/vol/purge.c     2005-05-25 11:13:16.000000000 +0200
@@ -89,7 +89,7 @@
     VOL_UNLOCK;
 }
 
-#define MAXOBLITATONCE 200
+#define MAXOBLITATONCE 1000
 /* delete a portion of an index, adjusting offset appropriately.  Returns 0 if
    things work and we should be called again, 1 if success full and done, and 
-1
    if an error occurred.  It adjusts offset appropriately on 0 or 1 return 
codes,
@@ -148,10 +148,13 @@
     OS_SYNC(afile->str_fd);
 
     /* finally, do the idec's */
+    V_linkHandle(avp)->ih_flags|=IH_DELAY_SYNC;                /* severe 
performance penalty */
     for (i = 0; i < iindex; i++) {
        IH_DEC(V_linkHandle(avp), inodes[i], V_parentId(avp));
        DOPOLL;
     }
+    V_linkHandle(avp)->ih_flags&=~IH_DELAY_SYNC;
+    IH_CONDSYNC(V_linkHandle(avp));
 
     /* return the new offset */
     *aoffset = offset;
unchanged:
--- openafs/src/vol/clone.c.o1381       2004-08-25 09:14:19.000000000 +0200
+++ openafs/src/vol/clone.c     2005-05-27 09:39:04.000000000 +0200
@@ -227,6 +227,12 @@
     decRock.vol = V_parentId(rwvp);
 
     /* Read each vnode in the old volume's index file */
+    /* fsyncing the link count file for every inode has a severe 
+       performance penalty, therefore we turn it off temporarily.
+       This assumes we're the only one on that file/volume  -
+       in particular when we force the fsync later!
+    */
+    V_linkHandle(rwvp)->ih_flags|=IH_DELAY_SYNC;
     for (offset = vcp->diskSize;
         STREAM_READ(rwvnode, vcp->diskSize, 1, rwfile) == 1;
         offset += vcp->diskSize) {
@@ -345,6 +351,12 @@
      * and shouldn't do the idecs.
      */
   error_exit:
+    /* Now take the fsync-bypass away again and force an fsync.
+       Again: assumes we're alone on this file, otherwise we need a lock!
+    */
+    V_linkHandle(rwvp)->ih_flags&=~IH_DELAY_SYNC;
+    IH_CONDSYNC(V_linkHandle(rwvp));
+
     if (rwfile)
        STREAM_CLOSE(rwfile);
     if (clfilein)
unchanged:
--- openafs/src/vol/namei_ops.c.o1382   2004-11-09 18:16:40.000000000 +0100
+++ openafs/src/vol/namei_ops.c 2005-06-01 16:40:28.000000000 +0200
@@ -572,6 +572,8 @@
 
     if (p2 == -1 && p3 == VI_LINKTABLE) {
        /* hack at tmp to setup for set link count call. */
+       memset((void *)&tfd, 0, sizeof(FdHandle_t));    /* minimalistic still, 
but a little cleaner */
+       tfd.fd_ih = &tmp;
        tfd.fd_fd = fd;
        code = namei_SetLinkCount(&tfd, (Inode) 0, 1, 0);
     }
only in patch2:
unchanged:
--- openafs/src/volser/dumpstuff.c.1rig 2005-11-02 12:39:29.000000000 +0100
+++ openafs/src/volser/dumpstuff.c      2006-02-14 14:49:58.000000000 +0100
@@ -1033,7 +1033,13 @@
 
     tdelo = delo;
     while (1) {
-       if (ReadVnodes(iodp, vp, 0, b1, s1, b2, s2, tdelo)) {
+       int temprc;
+
+       V_linkHandle(avp)->ih_flags |= IH_DELAY_SYNC;   /* Avoid repetitive 
fdsync()s on linkfile */
+       temprc = ReadVnodes(iodp, vp, 0, b1, s1, b2, s2, tdelo);
+       V_linkHandle(avp)->ih_flags &= ~IH_DELAY_SYNC;  /* normal sync 
behaviour again */
+       IH_CONDSYNC(V_linkHandle(avp));                 /* sync link file */
+       if (temprc) {
            error = VOLSERREAD_DUMPERROR;
            goto clean;
        }

Reply via email to