Author: kib
Date: Tue May  5 10:34:43 2009
New Revision: 191813
URL: http://svn.freebsd.org/changeset/base/191813

Log:
  MFC r189878:
  Fix two issues with bufdaemon, often causing the processes to hang in
  the "nbufkv" sleep.
  
  Do not retry request for the new block from ffs_bufwrite() when write is done
  from bufdaemon and there is a buffer shortage.
  
  In getnewbuf(), help bufdaemon to flush dirty buffers owned by the vnode
  locked by curthread.
  
  For MFC, default value for sysctl vfs.flushbufqtarget is set to -1,
  disabling the helpers. The TDP_BUFNEED flag value from HEAD conflicts
  with TDP_UPCALLING KSE bit, so it is moved to the end of allocated bits.

Modified:
  stable/7/sys/   (props changed)
  stable/7/sys/contrib/pf/   (props changed)
  stable/7/sys/dev/ath/ath_hal/   (props changed)
  stable/7/sys/dev/cxgb/   (props changed)
  stable/7/sys/gnu/fs/xfs/FreeBSD/xfs_buf.c
  stable/7/sys/kern/vfs_bio.c
  stable/7/sys/sys/buf.h
  stable/7/sys/sys/proc.h
  stable/7/sys/ufs/ffs/ffs_vfsops.c

Modified: stable/7/sys/gnu/fs/xfs/FreeBSD/xfs_buf.c
==============================================================================
--- stable/7/sys/gnu/fs/xfs/FreeBSD/xfs_buf.c   Tue May  5 09:24:20 2009        
(r191812)
+++ stable/7/sys/gnu/fs/xfs/FreeBSD/xfs_buf.c   Tue May  5 10:34:43 2009        
(r191813)
@@ -81,7 +81,7 @@ xfs_buf_get_empty(size_t size,  xfs_buft
 {
        struct buf *bp;
 
-       bp = geteblk(0);
+       bp = geteblk(0, 0);
        if (bp != NULL) {
                bp->b_bufsize = size;
                bp->b_bcount = size;
@@ -101,7 +101,7 @@ xfs_buf_get_noaddr(size_t len, xfs_bufta
        if (len >= MAXPHYS)
                return (NULL);
 
-       bp = geteblk(len);
+       bp = geteblk(len, 0);
        if (bp != NULL) {
                KASSERT(BUF_REFCNT(bp) == 1,
                        ("xfs_buf_get_empty: bp %p not locked",bp));

Modified: stable/7/sys/kern/vfs_bio.c
==============================================================================
--- stable/7/sys/kern/vfs_bio.c Tue May  5 09:24:20 2009        (r191812)
+++ stable/7/sys/kern/vfs_bio.c Tue May  5 10:34:43 2009        (r191813)
@@ -105,7 +105,8 @@ static void vfs_setdirty_locked_object(s
 static void vfs_vmio_release(struct buf *bp);
 static int vfs_bio_clcheck(struct vnode *vp, int size,
                daddr_t lblkno, daddr_t blkno);
-static int flushbufqueues(int, int);
+static int buf_do_flush(struct vnode *vp);
+static int flushbufqueues(struct vnode *, int, int);
 static void buf_daemon(void);
 static void bremfreel(struct buf *bp);
 
@@ -187,6 +188,9 @@ SYSCTL_INT(_vfs, OID_AUTO, getnewbufcall
 static int getnewbufrestarts;
 SYSCTL_INT(_vfs, OID_AUTO, getnewbufrestarts, CTLFLAG_RW, &getnewbufrestarts, 
0,
     "Number of times getnewbuf has had to restart a buffer aquisition");
+static int flushbufqtarget = -1;
+SYSCTL_INT(_vfs, OID_AUTO, flushbufqtarget, CTLFLAG_RW, &flushbufqtarget, 0,
+    "Amount of work to do in flushbufqueues when helping bufdaemon");
 
 /*
  * Wakeup point for bufdaemon, as well as indicator of whether it is already
@@ -258,6 +262,7 @@ static struct mtx bpinlock;
 #define QUEUE_DIRTY_GIANT 3    /* B_DELWRI buffers that need giant */
 #define QUEUE_EMPTYKVA 4       /* empty buffer headers w/KVA assignment */
 #define QUEUE_EMPTY    5       /* empty buffer headers */
+#define QUEUE_SENTINEL 1024    /* not an queue index, but mark for sentinel */
 
 /* Queues for free buffers with various properties */
 static TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES] = { { 0 } };
@@ -1707,21 +1712,23 @@ vfs_bio_awrite(struct buf *bp)
  */
 
 static struct buf *
-getnewbuf(int slpflag, int slptimeo, int size, int maxsize)
+getnewbuf(struct vnode *vp, int slpflag, int slptimeo, int size, int maxsize,
+    int gbflags)
 {
+       struct thread *td;
        struct buf *bp;
        struct buf *nbp;
        int defrag = 0;
        int nqindex;
        static int flushingbufs;
 
+       td = curthread;
        /*
         * We can't afford to block since we might be holding a vnode lock,
         * which may prevent system daemons from running.  We deal with
         * low-memory situations by proactively returning memory and running
         * async I/O rather then sync I/O.
         */
-
        atomic_add_int(&getnewbufcalls, 1);
        atomic_subtract_int(&getnewbufrestarts, 1);
 restart:
@@ -1953,8 +1960,9 @@ restart:
         */
 
        if (bp == NULL) {
-               int flags;
+               int flags, norunbuf;
                char *waitmsg;
+               int fl;
 
                if (defrag) {
                        flags = VFS_BIO_NEED_BUFSPACE;
@@ -1972,9 +1980,35 @@ restart:
                mtx_unlock(&bqlock);
 
                bd_speedup();   /* heeeelp */
+               if (gbflags & GB_NOWAIT_BD)
+                       return (NULL);
 
                mtx_lock(&nblock);
                while (needsbuffer & flags) {
+                       if (vp != NULL && (td->td_pflags & TDP_BUFNEED) == 0) {
+                               mtx_unlock(&nblock);
+                               /*
+                                * getblk() is called with a vnode
+                                * locked, and some majority of the
+                                * dirty buffers may as well belong to
+                                * the vnode. Flushing the buffers
+                                * there would make a progress that
+                                * cannot be achieved by the
+                                * buf_daemon, that cannot lock the
+                                * vnode.
+                                */
+                               norunbuf = ~(TDP_BUFNEED | TDP_NORUNNINGBUF) |
+                                   (td->td_pflags & TDP_NORUNNINGBUF);
+                               /* play bufdaemon */
+                               td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF;
+                               fl = buf_do_flush(vp);
+                               td->td_pflags &= norunbuf;
+                               mtx_lock(&nblock);
+                               if (fl != 0)
+                                       continue;
+                               if ((needsbuffer & flags) == 0)
+                                       break;
+                       }
                        if (msleep(&needsbuffer, &nblock,
                            (PRIBIO + 4) | slpflag, waitmsg, slptimeo)) {
                                mtx_unlock(&nblock);
@@ -2043,6 +2077,35 @@ static struct kproc_desc buf_kp = {
 };
 SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp);
 
+static int
+buf_do_flush(struct vnode *vp)
+{
+       int flushed;
+
+       flushed = flushbufqueues(vp, QUEUE_DIRTY, 0);
+       /* The list empty check here is slightly racy */
+       if (!TAILQ_EMPTY(&bufqueues[QUEUE_DIRTY_GIANT])) {
+               mtx_lock(&Giant);
+               flushed += flushbufqueues(vp, QUEUE_DIRTY_GIANT, 0);
+               mtx_unlock(&Giant);
+       }
+       if (flushed == 0) {
+               /*
+                * Could not find any buffers without rollback
+                * dependencies, so just write the first one
+                * in the hopes of eventually making progress.
+                */
+               flushbufqueues(vp, QUEUE_DIRTY, 1);
+               if (!TAILQ_EMPTY(
+                           &bufqueues[QUEUE_DIRTY_GIANT])) {
+                       mtx_lock(&Giant);
+                       flushbufqueues(vp, QUEUE_DIRTY_GIANT, 1);
+                       mtx_unlock(&Giant);
+               }
+       }
+       return (flushed);
+}
+
 static void
 buf_daemon()
 {
@@ -2056,7 +2119,7 @@ buf_daemon()
        /*
         * This process is allowed to take the buffer cache to the limit
         */
-       curthread->td_pflags |= TDP_NORUNNINGBUF;
+       curthread->td_pflags |= TDP_NORUNNINGBUF | TDP_BUFNEED;
        mtx_lock(&bdlock);
        for (;;) {
                bd_request = 0;
@@ -2071,30 +2134,8 @@ buf_daemon()
                 * normally would so they can run in parallel with our drain.
                 */
                while (numdirtybuffers > lodirtybuffers) {
-                       int flushed;
-
-                       flushed = flushbufqueues(QUEUE_DIRTY, 0);
-                       /* The list empty check here is slightly racy */
-                       if (!TAILQ_EMPTY(&bufqueues[QUEUE_DIRTY_GIANT])) {
-                               mtx_lock(&Giant);
-                               flushed += flushbufqueues(QUEUE_DIRTY_GIANT, 0);
-                               mtx_unlock(&Giant);
-                       }
-                       if (flushed == 0) {
-                               /*
-                                * Could not find any buffers without rollback
-                                * dependencies, so just write the first one
-                                * in the hopes of eventually making progress.
-                                */
-                               flushbufqueues(QUEUE_DIRTY, 1);
-                               if (!TAILQ_EMPTY(
-                                   &bufqueues[QUEUE_DIRTY_GIANT])) {
-                                       mtx_lock(&Giant);
-                                       flushbufqueues(QUEUE_DIRTY_GIANT, 1);
-                                       mtx_unlock(&Giant);
-                               }
+                       if (buf_do_flush(NULL) == 0)
                                break;
-                       }
                        uio_yield();
                }
 
@@ -2140,7 +2181,7 @@ SYSCTL_INT(_vfs, OID_AUTO, flushwithdeps
     0, "Number of buffers flushed with dependecies that require rollbacks");
 
 static int
-flushbufqueues(int queue, int flushdeps)
+flushbufqueues(struct vnode *lvp, int queue, int flushdeps)
 {
        struct thread *td = curthread;
        struct buf sentinel;
@@ -2151,20 +2192,37 @@ flushbufqueues(int queue, int flushdeps)
        int flushed;
        int target;
 
-       target = numdirtybuffers - lodirtybuffers;
-       if (flushdeps && target > 2)
-               target /= 2;
+       if (lvp == NULL) {
+               target = numdirtybuffers - lodirtybuffers;
+               if (flushdeps && target > 2)
+                       target /= 2;
+       } else
+               target = flushbufqtarget;
        flushed = 0;
        bp = NULL;
+       sentinel.b_qindex = QUEUE_SENTINEL;
        mtx_lock(&bqlock);
-       TAILQ_INSERT_TAIL(&bufqueues[queue], &sentinel, b_freelist);
+       TAILQ_INSERT_HEAD(&bufqueues[queue], &sentinel, b_freelist);
        while (flushed != target) {
-               bp = TAILQ_FIRST(&bufqueues[queue]);
-               if (bp == &sentinel)
+               bp = TAILQ_NEXT(&sentinel, b_freelist);
+               if (bp != NULL) {
+                       TAILQ_REMOVE(&bufqueues[queue], &sentinel, b_freelist);
+                       TAILQ_INSERT_AFTER(&bufqueues[queue], bp, &sentinel,
+                           b_freelist);
+               } else
                        break;
-               TAILQ_REMOVE(&bufqueues[queue], bp, b_freelist);
-               TAILQ_INSERT_TAIL(&bufqueues[queue], bp, b_freelist);
-
+               /*
+                * Skip sentinels inserted by other invocations of the
+                * flushbufqueues(), taking care to not reorder them.
+                */
+               if (bp->b_qindex == QUEUE_SENTINEL)
+                       continue;
+               /*
+                * Only flush the buffers that belong to the
+                * vnode locked by the curthread.
+                */
+               if (lvp != NULL && bp->b_vp != lvp)
+                       continue;
                if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
                        continue;
                if (bp->b_pin_count > 0) {
@@ -2212,16 +2270,28 @@ flushbufqueues(int queue, int flushdeps)
                        BUF_UNLOCK(bp);
                        continue;
                }
-               if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT, td) == 0) {
+               if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_CANRECURSE,
+                   td) == 0) {
                        mtx_unlock(&bqlock);
                        CTR3(KTR_BUF, "flushbufqueue(%p) vp %p flags %X",
                            bp, bp->b_vp, bp->b_flags);
-                       vfs_bio_awrite(bp);
+                       if (curproc == bufdaemonproc)
+                               vfs_bio_awrite(bp);
+                       else {
+                               bremfree(bp);
+                               bwrite(bp);
+                       }
                        vn_finished_write(mp);
                        VOP_UNLOCK(vp, 0, td);
                        flushwithdeps += hasdeps;
                        flushed++;
-                       waitrunningbufspace();
+
+                       /*
+                        * Sleeping on runningbufspace while holding
+                        * vnode lock leads to deadlock.
+                        */
+                       if (curproc == bufdaemonproc)
+                               waitrunningbufspace();
                        numdirtywakeup((lodirtybuffers + hidirtybuffers) / 2);
                        mtx_lock(&bqlock);
                        continue;
@@ -2603,7 +2673,7 @@ loop:
                maxsize = vmio ? size + (offset & PAGE_MASK) : size;
                maxsize = imax(maxsize, bsize);
 
-               bp = getnewbuf(slpflag, slptimeo, size, maxsize);
+               bp = getnewbuf(vp, slpflag, slptimeo, size, maxsize, flags);
                if (bp == NULL) {
                        if (slpflag || slptimeo)
                                return NULL;
@@ -2678,14 +2748,17 @@ loop:
  * set to B_INVAL.
  */
 struct buf *
-geteblk(int size)
+geteblk(int size, int flags)
 {
        struct buf *bp;
        int maxsize;
 
        maxsize = (size + BKVAMASK) & ~BKVAMASK;
-       while ((bp = getnewbuf(0, 0, size, maxsize)) == 0)
-               continue;
+       while ((bp = getnewbuf(NULL, 0, 0, size, maxsize, flags)) == NULL) {
+               if ((flags & GB_NOWAIT_BD) &&
+                   (curthread->td_pflags & TDP_BUFNEED) != 0)
+                       return (NULL);
+       }
        allocbuf(bp, size);
        bp->b_flags |= B_INVAL; /* b_dep cleared by getnewbuf() */
        KASSERT(BUF_REFCNT(bp) == 1, ("geteblk: bp %p not locked",bp));

Modified: stable/7/sys/sys/buf.h
==============================================================================
--- stable/7/sys/sys/buf.h      Tue May  5 09:24:20 2009        (r191812)
+++ stable/7/sys/sys/buf.h      Tue May  5 10:34:43 2009        (r191813)
@@ -475,6 +475,7 @@ buf_countdeps(struct buf *bp, int i)
  */
 #define        GB_LOCK_NOWAIT  0x0001          /* Fail if we block on a buf 
lock. */
 #define        GB_NOCREAT      0x0002          /* Don't create a buf if not 
found. */
+#define        GB_NOWAIT_BD    0x0004          /* Do not wait for bufdaemon */
 
 #ifdef _KERNEL
 extern int     nbuf;                   /* The number of buffer headers */
@@ -519,7 +520,7 @@ struct buf *     getpbuf(int *);
 struct buf *incore(struct bufobj *, daddr_t);
 struct buf *gbincore(struct bufobj *, daddr_t);
 struct buf *getblk(struct vnode *, daddr_t, int, int, int, int);
-struct buf *geteblk(int);
+struct buf *geteblk(int, int);
 int    bufwait(struct buf *);
 int    bufwrite(struct buf *);
 void   bufdone(struct buf *);

Modified: stable/7/sys/sys/proc.h
==============================================================================
--- stable/7/sys/sys/proc.h     Tue May  5 09:24:20 2009        (r191812)
+++ stable/7/sys/sys/proc.h     Tue May  5 10:34:43 2009        (r191813)
@@ -380,6 +380,7 @@ do {                                                        
                \
 #define        TDP_INBDFLUSH   0x00100000 /* Already in BO_BDFLUSH, do not 
recurse */
 #define        TDP_IGNSUSP     0x00800000 /* Permission to ignore the 
MNTK_SUSPEND* */
 #define        TDP_AUDITREC    0x01000000 /* Audit record pending on thread */
+#define        TDP_BUFNEED     0x02000000 /* Do not recurse into the buf flush 
*/
 
 /*
  * Reasons that the current thread can not be run yet.

Modified: stable/7/sys/ufs/ffs/ffs_vfsops.c
==============================================================================
--- stable/7/sys/ufs/ffs/ffs_vfsops.c   Tue May  5 09:24:20 2009        
(r191812)
+++ stable/7/sys/ufs/ffs/ffs_vfsops.c   Tue May  5 10:34:43 2009        
(r191813)
@@ -1842,7 +1842,9 @@ ffs_bufwrite(struct buf *bp)
                    ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
 
                /* get a new block */
-               newbp = geteblk(bp->b_bufsize);
+               newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD);
+               if (newbp == NULL)
+                       goto normal_write;
 
                /*
                 * set it to be identical to the old block.  We have to
@@ -1882,6 +1884,7 @@ ffs_bufwrite(struct buf *bp)
        }
 
        /* Let the normal bufwrite do the rest for us */
+normal_write:
        return (bufwrite(bp));
 }
 
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to